From 1e65b347edf67c44cc32818c663a6069580fd1a0 Mon Sep 17 00:00:00 2001 From: xzl Date: Mon, 16 Dec 2024 17:43:57 +0800 Subject: [PATCH] feat: update tree-sitter to 0.22.6 incremental parsing system for programming tools (development files) Issue: https://github.com/deepin-community/sig-deepin-sysdev-team/issues/556 Log: update repo --- .cargo/config.toml | 2 + .dockerignore | 2 + .editorconfig | 15 + .gitattributes | 5 + .gitignore | 1 - CHANGELOG.md | 237 +++ CONTRIBUTING.md | 2 +- Cargo.lock | 1620 +++++++++++--- Cargo.toml | 90 +- Dockerfile | 10 + FUNDING.json | 7 + LICENSE | 2 +- Makefile | 93 +- Package.swift | 41 + README.md | 7 +- build.zig | 18 + build.zig.zon | 10 + cli/Cargo.toml | 116 +- cli/README.md | 19 +- cli/benches/benchmark.rs | 75 +- cli/build.rs | 81 +- cli/config/Cargo.toml | 28 +- cli/config/README.md | 6 +- cli/config/src/lib.rs | 33 +- cli/emscripten-version | 1 - cli/loader/Cargo.toml | 56 +- cli/loader/README.md | 4 +- cli/loader/build.rs | 7 + cli/loader/emscripten-version | 1 + cli/loader/src/lib.rs | 1011 +++++++-- cli/npm/.gitignore | 1 + cli/npm/dsl.d.ts | 62 +- cli/npm/install.js | 107 +- cli/npm/package.json | 7 +- cli/src/generate/binding_files.rs | 154 -- .../generate/build_tables/build_lex_table.rs | 127 +- .../build_tables/build_parse_table.rs | 194 +- .../build_tables/coincident_tokens.rs | 30 +- cli/src/generate/build_tables/item.rs | 84 +- .../generate/build_tables/item_set_builder.rs | 90 +- .../build_tables/minimize_parse_table.rs | 146 +- cli/src/generate/build_tables/mod.rs | 133 +- .../generate/build_tables/token_conflicts.rs | 133 +- cli/src/generate/char_tree.rs | 133 -- cli/src/generate/dedup.rs | 14 +- cli/src/generate/dsl.js | 101 +- cli/src/generate/grammar-schema.json | 26 +- cli/src/generate/grammar_files.rs | 596 ++++++ cli/src/generate/grammars.rs | 66 +- cli/src/generate/mod.rs | 216 +- cli/src/generate/nfa.rs | 292 +-- cli/src/generate/node_types.rs | 200 +- cli/src/generate/parse_grammar.rs | 62 +- .../prepare_grammar/expand_repeats.rs | 22 +- .../generate/prepare_grammar/expand_tokens.rs | 363 ++-- .../extract_default_aliases.rs | 54 +- .../prepare_grammar/extract_tokens.rs | 62 +- .../prepare_grammar/flatten_grammar.rs | 41 +- .../prepare_grammar/intern_symbols.rs | 52 +- cli/src/generate/prepare_grammar/mod.rs | 71 +- .../prepare_grammar/process_inlines.rs | 80 +- cli/src/generate/render.rs | 1013 ++++----- cli/src/generate/rules.rs | 154 +- cli/src/generate/tables.rs | 47 +- cli/src/generate/templates/.editorconfig | 39 + cli/src/generate/templates/PARSER_NAME.h | 16 + cli/src/generate/templates/PARSER_NAME.pc.in | 11 + cli/src/generate/templates/Package.swift | 47 + cli/src/generate/templates/__init__.py | 5 + cli/src/generate/templates/__init__.pyi | 1 + cli/src/generate/templates/alloc.h | 54 + cli/src/generate/templates/binding.cc | 28 - cli/src/generate/templates/binding.go | 13 + cli/src/generate/templates/binding.gyp | 23 +- cli/src/generate/templates/binding_test.go | 15 + cli/src/generate/templates/build.rs | 34 +- cli/src/generate/templates/cargo.toml | 21 +- cli/src/generate/templates/gitattributes | 11 + cli/src/generate/templates/gitignore | 38 + cli/src/generate/templates/go.mod | 5 + cli/src/generate/templates/grammar.js | 11 + cli/src/generate/templates/index.d.ts | 28 + cli/src/generate/templates/index.js | 18 +- cli/src/generate/templates/js-binding.cc | 20 + cli/src/generate/templates/lib.rs | 22 +- cli/src/generate/templates/makefile | 112 + cli/src/generate/templates/package.json | 44 +- cli/src/generate/templates/py-binding.c | 27 + cli/src/generate/templates/pyproject.toml | 29 + cli/src/generate/templates/setup.py | 60 + cli/src/highlight.rs | 92 +- cli/src/lib.rs | 6 + cli/src/main.rs | 1201 +++++++---- cli/src/parse.rs | 339 ++- cli/src/playground.html | 8 +- cli/src/playground.rs | 87 +- cli/src/query.rs | 32 +- cli/src/query_testing.rs | 46 +- cli/src/tags.rs | 56 +- cli/src/test.rs | 974 +++++++-- cli/src/test_highlight.rs | 257 ++- cli/src/test_tags.rs | 109 +- cli/src/tests/async_context_test.rs | 284 +++ cli/src/tests/corpus_test.rs | 237 ++- cli/src/tests/detect_language.rs | 134 ++ cli/src/tests/helpers/allocations.rs | 26 +- cli/src/tests/helpers/dirs.rs | 46 +- cli/src/tests/helpers/edits.rs | 19 +- cli/src/tests/helpers/fixtures.rs | 118 +- cli/src/tests/helpers/mod.rs | 5 +- cli/src/tests/helpers/query_helpers.rs | 102 +- cli/src/tests/helpers/random.rs | 10 +- cli/src/tests/helpers/scope_sequence.rs | 12 +- cli/src/tests/highlight_test.rs | 238 ++- cli/src/tests/language_test.rs | 65 + cli/src/tests/mod.rs | 8 + cli/src/tests/node_test.rs | 257 ++- cli/src/tests/parser_hang_test.rs | 105 + cli/src/tests/parser_test.rs | 305 ++- cli/src/tests/pathological_test.rs | 5 +- cli/src/tests/proc_macro/Cargo.toml | 14 +- cli/src/tests/proc_macro/src/lib.rs | 16 +- cli/src/tests/query_test.rs | 1476 +++++++++---- cli/src/tests/tags_test.rs | 78 +- cli/src/tests/test_highlight_test.rs | 33 +- cli/src/tests/test_tags_test.rs | 30 +- cli/src/tests/text_provider_test.rs | 173 ++ cli/src/tests/tree_test.rs | 173 +- cli/src/tests/wasm_language_test.rs | 254 +++ cli/src/util.rs | 112 +- cli/src/wasm.rs | 210 +- debian/changelog | 15 + debian/clean | 1 + debian/control | 216 +- debian/copyright | 4 +- debian/gbp.conf | 2 +- debian/libtree-sitter0.symbols | 137 +- debian/patches/0001-Remove-wasm-feature.patch | 234 ++ ...0002-Relax-clap-dependency-to-4.4.18.patch | 23 + ...0003-Relax-ctrlc-dependency-to-3.4.2.patch | 23 + ...lax-html-escape-dependency-to-0.2.12.patch | 23 + ...005-Relax-memchr-dependency-to-2.7.1.patch | 23 + ...lax-regex-syntax-dependency-to-0.8.2.patch | 23 + ...07-Relax-semver-dependency-to-1.0.21.patch | 23 + ...elax-smallbitvec-dependency-to-2.5.1.patch | 23 + ...x-webbrowser-dependency-to-allow-0.8.patch | 23 + .../0010-Relax-heck-dependency-to-0.4.0.patch | 23 + ...11-Relax-anstyle-dependency-to-1.0.4.patch | 23 + ...-using-tree-sitter-0-when-building-t.patch | 23 + ...013-Relax-ctor-dependency-to-0.1-0.3.patch | 23 + ...014-Relax-fs4-dependency-to-0.9-0.12.patch | 40 + ...-lib-Bump-bindgen-dependency-to-0.70.patch | 23 + debian/patches/series | 15 + debian/rules | 10 + debian/tests/control | 150 ++ docs/Gemfile.lock | 27 +- docs/assets/css/style.scss | 14 + docs/assets/js/playground.js | 24 +- docs/index.md | 69 +- docs/section-2-using-parsers.md | 176 +- docs/section-3-creating-parsers.md | 340 ++- docs/section-4-syntax-highlighting.md | 51 +- docs/section-5-implementation.md | 6 +- docs/section-6-contributing.md | 22 +- docs/section-7-playground.html | 14 + docs/section-8-code-navigation-systems.md | 12 +- highlight/Cargo.toml | 20 +- highlight/README.md | 52 +- highlight/include/tree_sitter/highlight.h | 1 + highlight/src/c_lib.rs | 194 +- highlight/src/lib.rs | 335 ++- highlight/src/util.rs | 10 - lib/Cargo.toml | 34 +- lib/README.md | 3 +- lib/binding_rust/README.md | 58 +- lib/binding_rust/bindings.rs | 860 ++++---- lib/binding_rust/build.rs | 103 +- lib/binding_rust/ffi.rs | 169 +- lib/binding_rust/lib.rs | 1895 +++++++++++------ lib/binding_rust/util.rs | 11 +- lib/binding_rust/wasm_language.rs | 143 ++ lib/binding_web/.eslintrc.js | 22 + lib/binding_web/README.md | 38 +- lib/binding_web/binding.c | 253 ++- lib/binding_web/binding.js | 627 ++++-- lib/binding_web/check-artifacts-fresh.js | 18 +- lib/binding_web/exports.json | 109 - lib/binding_web/exports.txt | 108 + lib/binding_web/imports.js | 12 +- lib/binding_web/package.json | 5 +- lib/binding_web/suffix.js | 2 +- lib/binding_web/test/helper.js | 4 + lib/binding_web/test/language-test.js | 85 +- lib/binding_web/test/node-test.js | 525 +++-- lib/binding_web/test/parser-test.js | 296 ++- lib/binding_web/test/query-test.js | 335 ++- lib/binding_web/test/tree-test.js | 193 +- lib/binding_web/tree-sitter-web.d.ts | 155 +- lib/compile_flags.txt | 7 +- lib/include/tree_sitter/api.h | 615 ++++-- lib/src/alloc.c | 24 +- lib/src/alloc.h | 20 +- lib/src/array.h | 231 +- lib/src/atomic.h | 14 +- lib/src/get_changed_ranges.c | 2 +- lib/src/language.c | 92 +- lib/src/language.h | 57 +- lib/src/lexer.c | 27 +- lib/src/lexer.h | 2 +- lib/src/lib.c | 6 +- lib/src/node.c | 154 +- lib/src/parser.c | 312 ++- lib/{include/tree_sitter => src}/parser.h | 67 +- lib/src/query.c | 349 +-- lib/src/stack.c | 50 +- lib/src/subtree.c | 157 +- lib/src/subtree.h | 18 +- lib/src/tree.c | 52 +- lib/src/tree_cursor.c | 321 ++- lib/src/tree_cursor.h | 2 + lib/src/wasm/stdlib-symbols.txt | 24 + lib/src/wasm/stdlib.c | 109 + lib/src/wasm/wasm-stdlib.h | 1302 +++++++++++ lib/src/wasm_store.c | 1846 ++++++++++++++++ lib/src/wasm_store.h | 31 + rustfmt.toml | 6 + script/benchmark.cmd | 1 + script/build-fuzzers | 31 +- script/build-wasm | 53 +- script/build-wasm-stdlib | 34 + script/cliff.toml | 72 + script/fetch-emscripten | 6 +- script/fetch-fixtures | 12 +- script/fetch-fixtures.cmd | 2 +- script/generate-bindings | 51 +- script/generate-fixtures | 6 +- script/generate-fixtures-wasm | 8 +- script/reproduce | 31 +- script/run-fuzzer | 77 +- script/test | 21 +- script/test.cmd | 3 + script/version | 62 - tags/Cargo.toml | 21 +- tags/README.md | 7 +- tags/src/c_lib.rs | 184 +- tags/src/lib.rs | 107 +- test/fixtures/error_corpus/c_errors.txt | 8 +- .../error_corpus/javascript_errors.txt | 20 +- test/fixtures/error_corpus/json_errors.txt | 54 +- test/fixtures/error_corpus/python_errors.txt | 4 +- test/fixtures/error_corpus/ruby_errors.txt | 15 +- .../epsilon_external_tokens/scanner.c | 2 +- .../scanner.c | 46 +- .../external_and_internal_tokens/scanner.c | 2 +- .../external_extra_tokens/scanner.c | 2 +- .../test_grammars/external_tokens/scanner.c | 2 +- .../scanner.c | 3 +- .../get_col_should_hang_not_crash/corpus.txt | 0 .../get_col_should_hang_not_crash/grammar.js | 13 + .../get_col_should_hang_not_crash/scanner.c | 17 + .../inverted_external_token/scanner.c | 2 +- .../grammar.js | 31 + .../grammar.json | 65 - .../named_precedences/grammar.js | 48 + .../named_precedences/grammar.json | 159 -- .../grammar.js | 15 + .../grammar.json | 33 - .../nested_inlined_rules/grammar.js | 22 + .../nested_inlined_rules/grammar.json | 54 - .../partially_resolved_conflict/grammar.js | 19 + .../partially_resolved_conflict/grammar.json | 58 - .../grammar.js | 17 + .../grammar.json | 63 - .../grammar.js | 17 + .../grammar.json | 63 - .../grammar.js | 17 + .../grammar.json | 63 - .../precedence_on_subsequence/grammar.js | 30 + .../precedence_on_subsequence/grammar.json | 135 -- .../precedence_on_token/grammar.js | 36 + .../precedence_on_token/grammar.json | 100 - .../test_grammars/readme_grammar/grammar.js | 36 + .../test_grammars/readme_grammar/grammar.json | 67 - .../start_rule_is_blank/grammar.js | 7 + .../start_rule_is_blank/grammar.json | 6 - .../start_rule_is_token/grammar.js | 7 + .../start_rule_is_token/grammar.json | 6 - .../test_grammars/unicode_classes/grammar.js | 20 + .../unicode_classes/grammar.json | 42 - .../test_grammars/unused_rules/grammar.js | 27 + .../test_grammars/unused_rules/grammar.json | 73 - .../uses_current_column/grammar.js | 36 + .../uses_current_column/grammar.json | 69 - .../uses_current_column/scanner.c | 3 +- test/fuzz/README.md | 2 +- test/fuzz/fuzzer.cc | 2 +- test/fuzz/gen-dict.py | 31 - xtask/Cargo.toml | 20 + xtask/src/bump.rs | 282 +++ xtask/src/main.rs | 35 + 300 files changed, 24483 insertions(+), 9779 deletions(-) create mode 100644 .cargo/config.toml create mode 100644 .dockerignore create mode 100644 .editorconfig delete mode 100644 .gitignore create mode 100644 CHANGELOG.md mode change 120000 => 100644 CONTRIBUTING.md create mode 100644 Dockerfile create mode 100644 FUNDING.json create mode 100644 Package.swift create mode 100644 build.zig create mode 100644 build.zig.zon delete mode 100644 cli/emscripten-version create mode 100644 cli/loader/emscripten-version delete mode 100644 cli/src/generate/binding_files.rs delete mode 100644 cli/src/generate/char_tree.rs create mode 100644 cli/src/generate/grammar_files.rs create mode 100644 cli/src/generate/templates/.editorconfig create mode 100644 cli/src/generate/templates/PARSER_NAME.h create mode 100644 cli/src/generate/templates/PARSER_NAME.pc.in create mode 100644 cli/src/generate/templates/Package.swift create mode 100644 cli/src/generate/templates/__init__.py create mode 100644 cli/src/generate/templates/__init__.pyi create mode 100644 cli/src/generate/templates/alloc.h delete mode 100644 cli/src/generate/templates/binding.cc create mode 100644 cli/src/generate/templates/binding.go create mode 100644 cli/src/generate/templates/binding_test.go create mode 100644 cli/src/generate/templates/gitattributes create mode 100644 cli/src/generate/templates/gitignore create mode 100644 cli/src/generate/templates/go.mod create mode 100644 cli/src/generate/templates/grammar.js create mode 100644 cli/src/generate/templates/index.d.ts create mode 100644 cli/src/generate/templates/js-binding.cc create mode 100644 cli/src/generate/templates/makefile create mode 100644 cli/src/generate/templates/py-binding.c create mode 100644 cli/src/generate/templates/pyproject.toml create mode 100644 cli/src/generate/templates/setup.py create mode 100644 cli/src/tests/async_context_test.rs create mode 100644 cli/src/tests/detect_language.rs create mode 100644 cli/src/tests/language_test.rs create mode 100644 cli/src/tests/parser_hang_test.rs create mode 100644 cli/src/tests/text_provider_test.rs create mode 100644 cli/src/tests/wasm_language_test.rs create mode 100644 debian/clean create mode 100644 debian/patches/0001-Remove-wasm-feature.patch create mode 100644 debian/patches/0002-Relax-clap-dependency-to-4.4.18.patch create mode 100644 debian/patches/0003-Relax-ctrlc-dependency-to-3.4.2.patch create mode 100644 debian/patches/0004-Relax-html-escape-dependency-to-0.2.12.patch create mode 100644 debian/patches/0005-Relax-memchr-dependency-to-2.7.1.patch create mode 100644 debian/patches/0006-Relax-regex-syntax-dependency-to-0.8.2.patch create mode 100644 debian/patches/0007-Relax-semver-dependency-to-1.0.21.patch create mode 100644 debian/patches/0008-Relax-smallbitvec-dependency-to-2.5.1.patch create mode 100644 debian/patches/0009-Relax-webbrowser-dependency-to-allow-0.8.patch create mode 100644 debian/patches/0010-Relax-heck-dependency-to-0.4.0.patch create mode 100644 debian/patches/0011-Relax-anstyle-dependency-to-1.0.4.patch create mode 100644 debian/patches/0012-Report-deb-built-using-tree-sitter-0-when-building-t.patch create mode 100644 debian/patches/0013-Relax-ctor-dependency-to-0.1-0.3.patch create mode 100644 debian/patches/0014-Relax-fs4-dependency-to-0.9-0.12.patch create mode 100644 debian/patches/0015-lib-Bump-bindgen-dependency-to-0.70.patch create mode 100644 debian/patches/series create mode 100644 debian/tests/control delete mode 100644 highlight/src/util.rs create mode 100644 lib/binding_rust/wasm_language.rs create mode 100644 lib/binding_web/.eslintrc.js delete mode 100644 lib/binding_web/exports.json create mode 100644 lib/binding_web/exports.txt rename lib/{include/tree_sitter => src}/parser.h (69%) create mode 100644 lib/src/wasm/stdlib-symbols.txt create mode 100644 lib/src/wasm/stdlib.c create mode 100644 lib/src/wasm/wasm-stdlib.h create mode 100644 lib/src/wasm_store.c create mode 100644 lib/src/wasm_store.h create mode 100644 rustfmt.toml create mode 100755 script/build-wasm-stdlib create mode 100644 script/cliff.toml mode change 120000 => 100755 script/reproduce delete mode 100755 script/version create mode 100644 test/fixtures/test_grammars/get_col_should_hang_not_crash/corpus.txt create mode 100644 test/fixtures/test_grammars/get_col_should_hang_not_crash/grammar.js create mode 100644 test/fixtures/test_grammars/get_col_should_hang_not_crash/scanner.c create mode 100644 test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.js delete mode 100644 test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.json create mode 100644 test/fixtures/test_grammars/named_precedences/grammar.js delete mode 100644 test/fixtures/test_grammars/named_precedences/grammar.json create mode 100644 test/fixtures/test_grammars/named_rule_aliased_as_anonymous/grammar.js delete mode 100644 test/fixtures/test_grammars/named_rule_aliased_as_anonymous/grammar.json create mode 100644 test/fixtures/test_grammars/nested_inlined_rules/grammar.js delete mode 100644 test/fixtures/test_grammars/nested_inlined_rules/grammar.json create mode 100644 test/fixtures/test_grammars/partially_resolved_conflict/grammar.js delete mode 100644 test/fixtures/test_grammars/partially_resolved_conflict/grammar.json create mode 100644 test/fixtures/test_grammars/precedence_on_single_child_missing/grammar.js delete mode 100644 test/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json create mode 100644 test/fixtures/test_grammars/precedence_on_single_child_negative/grammar.js delete mode 100644 test/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json create mode 100644 test/fixtures/test_grammars/precedence_on_single_child_positive/grammar.js delete mode 100644 test/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json create mode 100644 test/fixtures/test_grammars/precedence_on_subsequence/grammar.js delete mode 100644 test/fixtures/test_grammars/precedence_on_subsequence/grammar.json create mode 100644 test/fixtures/test_grammars/precedence_on_token/grammar.js delete mode 100644 test/fixtures/test_grammars/precedence_on_token/grammar.json create mode 100644 test/fixtures/test_grammars/readme_grammar/grammar.js delete mode 100644 test/fixtures/test_grammars/readme_grammar/grammar.json create mode 100644 test/fixtures/test_grammars/start_rule_is_blank/grammar.js delete mode 100644 test/fixtures/test_grammars/start_rule_is_blank/grammar.json create mode 100644 test/fixtures/test_grammars/start_rule_is_token/grammar.js delete mode 100644 test/fixtures/test_grammars/start_rule_is_token/grammar.json create mode 100644 test/fixtures/test_grammars/unicode_classes/grammar.js delete mode 100644 test/fixtures/test_grammars/unicode_classes/grammar.json create mode 100644 test/fixtures/test_grammars/unused_rules/grammar.js delete mode 100644 test/fixtures/test_grammars/unused_rules/grammar.json create mode 100644 test/fixtures/test_grammars/uses_current_column/grammar.js delete mode 100644 test/fixtures/test_grammars/uses_current_column/grammar.json delete mode 100644 test/fuzz/gen-dict.py create mode 100644 xtask/Cargo.toml create mode 100644 xtask/src/bump.rs create mode 100644 xtask/src/main.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..35049cb --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[alias] +xtask = "run --package xtask --" diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..c7f83c2 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +target +.git diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..53780b3 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,15 @@ +root = true + +[*] +indent_style = space +indent_size = 2 +tab_width = 8 +end_of_line = lf +insert_final_newline = true + +[*.rs] +indent_size = 4 + +[Makefile] +indent_style = tab +indent_size = 8 diff --git a/.gitattributes b/.gitattributes index 4fcce33..1d9b8cb 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,7 @@ +* text=auto eol=lf + /lib/src/unicode/*.h linguist-vendored /lib/src/unicode/LICENSE linguist-vendored + +/cli/src/generate/prepare_grammar/*.json -diff +Cargo.lock -diff diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 224e7f0..0000000 --- a/.gitignore +++ /dev/null @@ -1 +0,0 @@ -.pc/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..995afd9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,237 @@ +# Changelog + +## [0.22.5] - 2024-04-14 + +### Bug Fixes + +- **cli**: Fixed an issue where unused constants were included in generated parsers in some cases. + +## [0.22.4] - 2024-04-12 + +### Bug Fixes + +- **cli**: Fixed an issue where redundant conditionals were included in generated lexer functions. + +## [0.22.3] - 2024-04-12 + +### Performance + +- **cli**: Reduced the compile time of generated parsers by generating C code with fewer conditionals + (https://github.com/tree-sitter/tree-sitter/pull/3234) + +## [0.22.1] — 2024-03-10 + +### Bug Fixes + +- Cli build script behavior on release + +## [0.22.0] — 2024-03-10 + +### Breaking + +- Remove top-level `corpus` dir for tests + The cli will now only look in `test/corpus` for tests +- Remove redundant escape regex & curly brace regex preprocessing () +- **bindings**: Convert node bindings to NAPI () +- **wasm**: Make `current*`, `is*`, and `has*` methods properties () +- **wasm**: Keep API in-line with upstream and start aligning with node () + +### Features + +- Add xtasks to assist with bumping crates () +- Improve language bindings () +- Expose the allocator and array header files for external scanners () +- Add typings for the node bindings +- Replace `nan` with `node-addon-api` and conditionally print logs +- **bindings**: Add more make targets +- **bindings**: Add peerDependencies for npm +- **bindings**: Add prebuildify to node +- **bindings**: Remove dsl types file () +- **node**: Type tag the language () +- **test**: Add attributes for corpus tests + +### Bug Fixes + +- Apply some `scan-build` suggestions (unused assignment/garbage access) () +- Wrap `||` comparison in parentheses when `&&` is used () +- Ignore unused variables in the array macros () +- `binding.cc` overwrite should replace `PARSER_NAME` () +- Don't use `__declspec(dllexport)` on windows () +- Parsers should export the language function on windows +- Allow the regex `v` flag () +- **assertions**: Case shouldn't matter for comment node detection +- **bindings**: Editorconfig and setup.py fixes () +- **bindings**: Insert `types` after `main` if it exists () +- **bindings**: Fix template oversights () +- **cli**: Only output the sources with `--no-bindings` () +- **generate**: Add `.npmignore`, populate Swift's exclude list () +- **generate**: Extern allocator functions for the template don't need to be "exported" () +- **generate**: Camel case name in `Cargo.toml` description () +- **lib**: Include `api.h` so `ts_set_allocator` is visible () + +### Documentation + +- Add GitHub user and PR info to the changelog +- Add css for inline code () +- Document test attributes +- Add `Ohm` language parser +- Remove duplicate `the`'s () +- Add discord and matrix badges () + +### Refactor + +- Rename TS_REUSE_ALLOCATOR flag () +- Remove extern/const where possible +- **array**: Use pragma GCC in clang too +- **bindings**: Remove npmignore () + +### Testing + +- Don't use TS_REUSE_ALLOCATOR on Darwin systems () +- Add test case for parse stack merging with incorrect error cost bug () + +### Build System and CI + +- Improve changelog settings () +- Unify crate versions via workspace () +- Update `cc` to remove annoying debug output () +- Adjust dependabot settings () +- Use c11 everywhere +- Add uninstall command +- Don't skip tests on failing lint () +- Remove unused deps, bump deps, and bump MSRV to 1.74.1 () +- **bindings**: Metadata improvements +- **bindings**: Make everything c11 () +- **dependabot**: Update weekly instead of daily () +- **deps**: Bump the cargo group with 1 update () +- **deps**: Bump the cargo group with 1 update () +- **deps**: Bump deps & lockfile () +- **deps**: Bump the cargo group with 4 updates () +- **lint**: Detect if `Cargo.lock` needs to be updated () +- **lint**: Make lockfile check quiet () +- **swift**: Move 'cLanguageStandard' behind 'targets' () + +### Other + +- Make Node.js language bindings context aware () + They don't have any dynamic global data, so all it takes is just declaring them as such +- Fix crash when attempting to load ancient languages via wasm () +- Use workspace dependencies for internal crates like Tree-sitter () +- Remove vendored wasmtime headers (https://github.com/tree-sitter/tree-sitter/pull/3084) + When building rust binding, use wasmtime headers provided via cargo + by the wasmtime-c-api crate. +- Fix invalid parse stack recursive merging with mismatched error cost () + Allowing this invalid merge caused an invariant to be violated + later on during parsing, when handling a later error. +- Fix regression in `subtree_compare` () +- docs: Add `Ohm` language parser () +- Delete `binding_files.rs` () +- **bindings**: Consistent wording () +- **bindings**: Ignore more artifacts () + +## [0.21.0] — 2024-02-21 + +### Breaking + +- Remove the apply-all-captures flag, make last-wins precedence the default + + **NOTE**: This change might cause breakage in your grammar's highlight tests. + Just flip the order around of the relevant queries, and keep in mind that the + last query that matches will win. + +### Features + +- Use lockfiles to dedup recompilation +- Improve error message for files with an unknown grammar path () +- Implement first-line-regex () +- Error out if an empty string is in the `extras` array +- Allow specifying an external scanner's files () +- Better error info when a scanner is missing required symbols +- **cli**: Add an optional `grammar-path` argument for the playground () +- **cli**: Add optional `config-path` argument () +- **loader**: Add more commonly used default parser directories + +### Bug Fixes + +- Prettify xml output and add node position info () +- Inherited grammar generation +- Properly error out when the word property is an invalid rule +- Update schema for regex flags () +- Properly handle `Query.matches` when filtering out results () +- Sexp format edge case with quoted closed parenthesis () +- Always push the default files if there's no `externals` +- Don't log NUL characters () +- Don't throw an error if the user uses `map` in the grammar () +- Remove redundant imports () +- **cli**: Installation via a HTTP tunnel proxy () +- **cli**: Don't update tests automatically if parse errors are detected () +- **cli**: Don't use `long` for `grammar_path` +- **test**: Allow writing updates to tests without erroneous nodes instead of denying all of them if a single error is found +- **test**: Edge case when parsing `UNEXPECTED`/`MISSING` nodes with an indentation level greater than 0 +- **wasm**: Remove C++ mangled symbols () + +### Documentation + +- Create issue template () +- Document regex limitations +- Mention that `token($.foo)` is illegal +- Explicitly mention behavior of walking outside the given "root" node for a `TSTreeCursor` () +- Small fixes () +- Add `Tact` language parser () +- **web**: Provide deno usage information () + +### Refactor + +- Extract regex check into a function and lower its precedence +- `&PathBuf` -> `&Path` () +- Name anonymous types in api.h () + +### Testing + +- Add quotes around bash variables () +- Update html tests + +### Build System and CI + +- Only create release for normal semver tags () +- Add useful development targets to makefile () +- Remove minimum glibc information in summary page () +- Use the native m1 mac runner () +- Add editorconfig () +- Remove symbolic links from repository () +- Move common Cargo.toml keys into the workspace and inherit them () +- Remove reviewers when drafting or closing a PR () +- Enable creating changelogs with git-cliff () +- Cache fixtures () +- Don't cancel jobs on master () +- Relax caching requirements () +- **deps**: Bump clap from 4.4.18 to 4.5.0 () +- **deps**: Bump wasmtime from v16.0.0 to v17.0.1 () +- **deps**: Bump wasmtime to v18.0.1 () +- **sanitize**: Add a timeout of 60 minutes () +- **sanitize**: Reduce timeout to 20 minutes () + +### Other + +- Document preferred language for scanner () +- Add java and tsx to corpus tests () +- Provide a CLI flag to open `log.html` () +- Some more clippy lints () +- Remove deprecated query parsing mechanism () +- Print out full compiler arguments ran when it fails () +- Deprecate C++ scanners () +- Add some documentation to the playground page () +- Update relevant rust tests () +- Clippy lints () +- Error out when multiple arguments are passed to `token`/`token.immediate` () +- Tidying +- Prefer turbofish syntax where possible () +- Use published wasmtime crates +- Cleaner cast +- Update `Cargo.lock` +- Get rid of `github_issue_test` file () +- **cli**: Use spawn to display `emcc`'s stdout and stderr () +- **cli**: Warn users when a query path needed for a subcommand isn't specified in a grammar's package.json +- **generate**: Dedup and warn about duplicate or invalid rules () +- **test**: Use different languages for async tests () +- **wasm**: Use `SIDE_MODULE=2` to silence warning () diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 120000 index 4f64371..0000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1 +0,0 @@ -docs/section-6-contributing.md \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..42bc7b7 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1 @@ +See [section-6-contributing.md](./docs/section-6-contributing.md) diff --git a/Cargo.lock b/Cargo.lock index 956a3f4..2f4dfac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,11 +2,23 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] @@ -20,11 +32,66 @@ dependencies = [ "winapi", ] +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a64c907d4e79225ac72e2a354c9ce84d50ebb4586dee56c82b3ee73004f537f5" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + [[package]] name = "anyhow" -version = "1.0.70" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" + +[[package]] +name = "arbitrary" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" [[package]] name = "ascii" @@ -33,21 +100,42 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" [[package]] -name = "atty" -version = "0.2.14" +name = "autocfg" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", + "serde", ] [[package]] -name = "autocfg" -version = "1.1.0" +name = "bindgen" +version = "0.69.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +dependencies = [ + "bitflags 2.5.0", + "cexpr", + "clang-sys", + "itertools", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", + "which", +] [[package]] name = "bitflags" @@ -55,23 +143,34 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + [[package]] name = "bumpalo" -version = "3.12.0" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytes" -version = "1.4.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cc" -version = "1.0.79" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "065a29261d53ba54260972629f9ca6bffa69bac13cd1fed61420f7fa68b9f8bd" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] [[package]] name = "cesu8" @@ -79,38 +178,95 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + [[package]] name = "chunked_transfer" -version = "1.4.1" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4de3bc4ea267985becf712dc6d9eed8b04c953b3fcfb339ebc87acd9804901" + +[[package]] +name = "clang-sys" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cca491388666e04d7248af3f60f0c40cfb0991c72205595d7c396e3510207d1a" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +dependencies = [ + "glob", + "libc", + "libloading", +] [[package]] name = "clap" -version = "2.34.0" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" dependencies = [ - "ansi_term", - "atty", - "bitflags", + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", "strsim", - "textwrap", - "unicode-width", - "vec_map", ] +[[package]] +name = "clap_derive" +version = "4.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" + [[package]] name = "combine" -version = "4.6.6" +version = "4.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" dependencies = [ "bytes", "memchr", @@ -118,9 +274,9 @@ dependencies = [ [[package]] name = "core-foundation" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" dependencies = [ "core-foundation-sys", "libc", @@ -128,18 +284,146 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "cranelift-bforest" +version = "0.106.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b57d4f3ffc28bbd6ef1ca7b50b20126717232f97487efe027d135d9d87eb29c" +dependencies = [ + "cranelift-entity", +] + +[[package]] +name = "cranelift-codegen" +version = "0.106.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1f7d0ac7fd53f2c29db3ff9a063f6ff5a8be2abaa8f6942aceb6e1521e70df7" +dependencies = [ + "bumpalo", + "cranelift-bforest", + "cranelift-codegen-meta", + "cranelift-codegen-shared", + "cranelift-control", + "cranelift-entity", + "cranelift-isle", + "gimli", + "hashbrown 0.14.5", + "log", + "regalloc2", + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cranelift-codegen-meta" +version = "0.106.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b40bf21460a600178956cb7fd900a7408c6587fbb988a8063f7215361801a1da" +dependencies = [ + "cranelift-codegen-shared", +] + +[[package]] +name = "cranelift-codegen-shared" +version = "0.106.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d792ecc1243b7ebec4a7f77d9ed428ef27456eeb1f8c780587a6f5c38841be19" + +[[package]] +name = "cranelift-control" +version = "0.106.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea2808043df964b73ad7582e09afbbe06a31f3fb9db834d53e74b4e16facaeb" +dependencies = [ + "arbitrary", +] + +[[package]] +name = "cranelift-entity" +version = "0.106.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1930946836da6f514da87625cd1a0331f3908e0de454628c24a0b97b130c4d4" +dependencies = [ + "serde", + "serde_derive", +] + +[[package]] +name = "cranelift-frontend" +version = "0.106.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5482a5fcdf98f2f31b21093643bdcfe9030866b8be6481117022e7f52baa0f2b" +dependencies = [ + "cranelift-codegen", + "log", + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cranelift-isle" +version = "0.106.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f6e1869b6053383bdb356900e42e33555b4c9ebee05699469b7c53cdafc82ea" + +[[package]] +name = "cranelift-native" +version = "0.106.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a91446e8045f1c4bc164b7bba68e2419c623904580d4b730877a663c6da38964" +dependencies = [ + "cranelift-codegen", + "libc", + "target-lexicon", +] + +[[package]] +name = "cranelift-wasm" +version = "0.106.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8b17979b862d3b0d52de6ae3294ffe4d86c36027b56ad0443a7c8c8f921d14f" +dependencies = [ + "cranelift-codegen", + "cranelift-entity", + "cranelift-frontend", + "itertools", + "log", + "smallvec", + "wasmparser 0.201.0", + "wasmtime-types", +] + +[[package]] +name = "crc32fast" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +dependencies = [ + "cfg-if", +] [[package]] name = "ctor" -version = "0.1.26" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" dependencies = [ "quote", - "syn 1.0.109", + "syn", +] + +[[package]] +name = "ctrlc" +version = "3.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "672465ae37dc1bc6380a6547a8883d5dd397b0f1faaad4f265726cc7042a5345" +dependencies = [ + "nix", + "windows-sys 0.52.0", ] [[package]] @@ -156,89 +440,127 @@ checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" [[package]] name = "dirs" -version = "3.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309" -dependencies = [ - "dirs-sys", -] - -[[package]] -name = "dirs" -version = "4.0.0" +version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" dependencies = [ "dirs-sys", ] [[package]] name = "dirs-sys" -version = "0.3.7" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" dependencies = [ "libc", + "option-ext", "redox_users", - "winapi", + "windows-sys 0.48.0", ] [[package]] name = "either" -version = "1.8.1" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" + +[[package]] +name = "equivalent" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.0" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ - "errno-dragonfly", "libc", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] -name = "errno-dragonfly" -version = "0.1.2" +name = "fallible-iterator" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" -dependencies = [ - "cc", - "libc", -] +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" [[package]] name = "fastrand" -version = "1.9.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + +[[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" dependencies = [ - "instant", + "cfg-if", + "libc", + "redox_syscall", + "windows-sys 0.52.0", ] [[package]] name = "form_urlencoded" -version = "1.1.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] +[[package]] +name = "fs4" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21dabded2e32cd57ded879041205c60a4a4c4bab47bd0fd2fa8b01f30849f02b" +dependencies = [ + "rustix", + "windows-sys 0.52.0", +] + [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" dependencies = [ "cfg-if", "libc", "wasi", ] +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +dependencies = [ + "fallible-iterator", + "indexmap", + "stable_deref_trait", +] + +[[package]] +name = "git2" +version = "0.18.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70" +dependencies = [ + "bitflags 2.5.0", + "libc", + "libgit2-sys", + "log", + "openssl-probe", + "openssl-sys", + "url", +] + [[package]] name = "glob" version = "0.3.1" @@ -247,24 +569,36 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] [[package]] -name = "hermit-abi" -version = "0.1.19" +name = "hashbrown" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "libc", + "ahash", ] [[package]] -name = "hermit-abi" -version = "0.3.1" +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "home" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] [[package]] name = "html-escape" @@ -277,15 +611,15 @@ dependencies = [ [[package]] name = "httpdate" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "idna" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -293,39 +627,41 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.3" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ - "autocfg", - "hashbrown", + "equivalent", + "hashbrown 0.14.5", + "serde", ] [[package]] -name = "instant" -version = "0.1.12" +name = "indoc" +version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" [[package]] -name = "io-lifetimes" -version = "1.0.9" +name = "itertools" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ - "hermit-abi 0.3.1", - "libc", - "windows-sys", + "either", ] [[package]] name = "itoa" -version = "1.0.6" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jni" @@ -340,7 +676,7 @@ dependencies = [ "log", "thiserror", "walkdir", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -349,11 +685,20 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +[[package]] +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" -version = "0.3.61" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" dependencies = [ "wasm-bindgen", ] @@ -365,38 +710,106 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] -name = "libc" -version = "0.2.141" +name = "lazycell" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] -name = "libloading" -version = "0.7.4" +name = "leb128" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" -dependencies = [ - "cfg-if", - "winapi", -] +checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" [[package]] -name = "linux-raw-sys" -version = "0.3.1" +name = "libc" +version = "0.2.154" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" +checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" [[package]] -name = "log" -version = "0.4.17" +name = "libgit2-sys" +version = "0.16.2+1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" dependencies = [ - "cfg-if", + "cc", + "libc", + "libssh2-sys", + "libz-sys", + "openssl-sys", + "pkg-config", ] [[package]] -name = "malloc_buf" +name = "libloading" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +dependencies = [ + "cfg-if", + "windows-targets 0.52.5", +] + +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags 2.5.0", + "libc", +] + +[[package]] +name = "libssh2-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dc8a030b787e2119a731f1951d6a773e2280c660f8ec4b0f5e1505a386e71ee" +dependencies = [ + "cc", + "libc", + "libz-sys", + "openssl-sys", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "libz-sys" +version = "1.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e143b5e666b2695d28f6bca6497720813f699c9602dd7f5cac91008b8ada7f9" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "mach" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +dependencies = [ + "libc", +] + +[[package]] +name = "malloc_buf" version = "0.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" @@ -406,9 +819,33 @@ dependencies = [ [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "memfd" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2cffa4ad52c6f791f4f8b15f0c05f9824b2ced1160e88cc393d64fff9a8ac64" +dependencies = [ + "rustix", +] + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "ndk-context" @@ -416,6 +853,28 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b" +[[package]] +name = "nix" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" +dependencies = [ + "bitflags 2.5.0", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "objc" version = "0.2.7" @@ -425,26 +884,71 @@ dependencies = [ "malloc_buf", ] +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "crc32fast", + "hashbrown 0.14.5", + "indexmap", + "memchr", +] + [[package]] name = "once_cell" -version = "1.17.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] -name = "output_vt100" -version = "0.1.3" +name = "openssl-probe" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" dependencies = [ - "winapi", + "cc", + "libc", + "pkg-config", + "vcpkg", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + [[package]] name = "percent-encoding" -version = "2.2.0" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "pkg-config" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "ppv-lite86" @@ -454,40 +958,47 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "pretty_assertions" -version = "0.7.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cab0e7c02cf376875e9335e0ba1da535775beb5450d21e1dffca068818ed98b" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" dependencies = [ - "ansi_term", - "ctor", "diff", - "output_vt100", + "yansi", +] + +[[package]] +name = "prettyplease" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ac2cf0f2e4f42b49f5ffd07dae8d746508ef7526c13940e5f524012ae6c6550" +dependencies = [ + "proc-macro2", + "syn", ] [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" dependencies = [ "unicode-ident", ] [[package]] -name = "proc_macro" -version = "0.1.0" +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" dependencies = [ - "proc-macro2", - "quote", - "rand", - "syn 1.0.109", + "cc", ] [[package]] name = "quote" -version = "1.0.26" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] @@ -530,38 +1041,54 @@ checksum = "f2ff9a1f06a88b01621b7ae906ef0211290d1c8a168a15542486a8f61c0833b9" [[package]] name = "redox_syscall" -version = "0.2.16" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] -name = "redox_syscall" -version = "0.3.5" +name = "redox_users" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" dependencies = [ - "bitflags", + "getrandom", + "libredox", + "thiserror", ] [[package]] -name = "redox_users" -version = "0.4.3" +name = "regalloc2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +checksum = "ad156d539c879b7a24a363a2016d77961786e71f48f2e2fc8302a92abd2429a6" dependencies = [ - "getrandom", - "redox_syscall 0.2.16", - "thiserror", + "hashbrown 0.13.2", + "log", + "rustc-hash", + "slice-group-by", + "smallvec", ] [[package]] name = "regex" -version = "1.7.3" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", @@ -570,9 +1097,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "rustc-hash" @@ -582,23 +1109,22 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.37.7" +version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aae838e49b3d63e9274e1c01833cc8139d3fec468c3b84688c628f44b1ae11d" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ - "bitflags", + "bitflags 2.5.0", "errno", - "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "ryu" -version = "1.0.13" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "same-file" @@ -611,35 +1137,35 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.17" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" +checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" [[package]] name = "serde" -version = "1.0.159" +version = "1.0.200" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065" +checksum = "ddc6f9cc94d67c0e21aaf7eda3a010fd3af78ebf6e096aa6e2e13c79749cce4f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.159" +version = "1.0.200" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" +checksum = "856f046b9400cee3c8c94ed572ecdb752444c24528c035cd35882aad6f492bcb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn", ] [[package]] name = "serde_json" -version = "1.0.95" +version = "1.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" +checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" dependencies = [ "indexmap", "itoa", @@ -647,23 +1173,62 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" +dependencies = [ + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "slice-group-by" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7" + [[package]] name = "smallbitvec" -version = "2.5.1" +version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75ce4f9dc4a41b4c3476cc925f1efb11b66df373a8fde5d4b8915fa91b5d995e" +checksum = "fcc3fc564a4b53fd1e8589628efafe57602d91bde78be18186b5f61e8faea470" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "sptr" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "strsim" -version = "0.8.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "1.0.109" +version = "2.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" dependencies = [ "proc-macro2", "quote", @@ -671,56 +1236,41 @@ dependencies = [ ] [[package]] -name = "syn" -version = "2.0.13" +name = "target-lexicon" +version = "0.12.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c9da457c5285ac1f936ebd076af6dac17a61cfe7826f2076b4d015cf47bc8ec" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] +checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" [[package]] name = "tempfile" -version = "3.5.0" +version = "3.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", "fastrand", - "redox_syscall 0.3.5", "rustix", - "windows-sys", -] - -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", + "windows-sys 0.52.0", ] [[package]] name = "thiserror" -version = "1.0.40" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.40" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn", ] [[package]] @@ -752,76 +1302,139 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "toml" -version = "0.5.11" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +checksum = "d3328d4f68a705b2a4498da1d580585d39a6510f98318a2cec3018a7ec61ddef" dependencies = [ + "indexmap", "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", ] [[package]] name = "tree-sitter" -version = "0.20.10" +version = "0.22.6" dependencies = [ + "bindgen", "cc", - "lazy_static", "regex", + "wasmtime-c-api-impl", ] [[package]] name = "tree-sitter-cli" -version = "0.20.8" +version = "0.22.6" dependencies = [ "ansi_term", + "anstyle", "anyhow", - "atty", "clap", "ctor", + "ctrlc", "difference", - "dirs 3.0.2", + "dirs", + "filetime", "glob", + "heck", "html-escape", "indexmap", + "indoc", "lazy_static", "log", + "memchr", "pretty_assertions", - "proc_macro", "rand", "regex", "regex-syntax", "rustc-hash", "semver", "serde", + "serde_derive", "serde_json", "smallbitvec", "tempfile", "tiny_http", - "toml", "tree-sitter", "tree-sitter-config", "tree-sitter-highlight", "tree-sitter-loader", "tree-sitter-tags", + "tree-sitter-tests-proc-macro", "unindent", "walkdir", + "wasmparser 0.206.0", "webbrowser", - "which", ] [[package]] name = "tree-sitter-config" -version = "0.19.0" +version = "0.22.6" dependencies = [ "anyhow", - "dirs 3.0.2", + "dirs", "serde", "serde_json", ] [[package]] name = "tree-sitter-highlight" -version = "0.20.1" +version = "0.22.6" dependencies = [ + "lazy_static", "regex", "thiserror", "tree-sitter", @@ -829,16 +1442,19 @@ dependencies = [ [[package]] name = "tree-sitter-loader" -version = "0.20.0" +version = "0.22.6" dependencies = [ "anyhow", "cc", - "dirs 3.0.2", + "dirs", + "fs4", + "indoc", "libloading", "once_cell", "regex", "serde", "serde_json", + "tempfile", "tree-sitter", "tree-sitter-highlight", "tree-sitter-tags", @@ -846,7 +1462,7 @@ dependencies = [ [[package]] name = "tree-sitter-tags" -version = "0.20.2" +version = "0.22.6" dependencies = [ "memchr", "regex", @@ -854,44 +1470,48 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "tree-sitter-tests-proc-macro" +version = "0.0.0" +dependencies = [ + "proc-macro2", + "quote", + "rand", + "syn", +] + [[package]] name = "unicode-bidi" -version = "0.3.13" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-normalization" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" dependencies = [ "tinyvec", ] -[[package]] -name = "unicode-width" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" - [[package]] name = "unindent" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa30f5ea51ff7edfc797c6d3f9ec8cbd8cfedef5371766b7181d33977f4814f" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "url" -version = "2.3.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", "idna", @@ -900,21 +1520,33 @@ dependencies = [ [[package]] name = "utf8-width" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" +checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" [[package]] -name = "vec_map" -version = "0.8.2" +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.3.3" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", @@ -928,9 +1560,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.84" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -938,24 +1570,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.84" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 1.0.109", + "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.84" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -963,28 +1595,261 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.84" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.84" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "wasm-encoder" +version = "0.201.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9c7d2731df60006819b013f64ccc2019691deccf6e11a1804bc850cd6748f1a" +dependencies = [ + "leb128", +] + +[[package]] +name = "wasmparser" +version = "0.201.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84e5df6dba6c0d7fafc63a450f1738451ed7a0b52295d83e868218fa286bf708" +dependencies = [ + "bitflags 2.5.0", + "indexmap", + "semver", +] + +[[package]] +name = "wasmparser" +version = "0.206.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39192edb55d55b41963db40fd49b0b542156f04447b5b512744a91d38567bdbc" +dependencies = [ + "ahash", + "bitflags 2.5.0", + "hashbrown 0.14.5", + "indexmap", + "semver", +] + +[[package]] +name = "wasmtime" +version = "19.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e300c0e3f19dc9064e3b17ce661088646c70dbdde36aab46470ed68ba58db7d" +dependencies = [ + "anyhow", + "bincode", + "bumpalo", + "cfg-if", + "gimli", + "indexmap", + "libc", + "log", + "object", + "once_cell", + "paste", + "rustix", + "serde", + "serde_derive", + "serde_json", + "target-lexicon", + "wasmparser 0.201.0", + "wasmtime-cranelift", + "wasmtime-environ", + "wasmtime-jit-icache-coherence", + "wasmtime-runtime", + "wasmtime-slab", + "windows-sys 0.52.0", +] + +[[package]] +name = "wasmtime-asm-macros" +version = "19.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "110aa598e02a136fb095ca70fa96367fc16bab55256a131e66f9b58f16c73daf" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "wasmtime-c-api-impl" +version = "19.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be684dae96dc2d371317848f352135333fcefc3483b67ade73031f9cdbbae52e" +dependencies = [ + "anyhow", + "log", + "once_cell", + "tracing", + "wasmtime", + "wasmtime-c-api-macros", +] + +[[package]] +name = "wasmtime-c-api-macros" +version = "19.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab36d96e82e247a44b0500d8e1ccc103da5d24da017f5466d0d8cb6a1b2383f8" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "wasmtime-cranelift" +version = "19.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e923262451a4b5b39fe02f69f1338d56356db470e289ea1887346b9c7f592738" +dependencies = [ + "anyhow", + "cfg-if", + "cranelift-codegen", + "cranelift-control", + "cranelift-entity", + "cranelift-frontend", + "cranelift-native", + "cranelift-wasm", + "gimli", + "log", + "object", + "target-lexicon", + "thiserror", + "wasmparser 0.201.0", + "wasmtime-cranelift-shared", + "wasmtime-environ", + "wasmtime-versioned-export-macros", +] + +[[package]] +name = "wasmtime-cranelift-shared" +version = "19.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "508898cbbea0df81a5d29cfc1c7c72431a1bc4c9e89fd9514b4c868474c05c7a" +dependencies = [ + "anyhow", + "cranelift-codegen", + "cranelift-control", + "cranelift-native", + "gimli", + "object", + "target-lexicon", + "wasmtime-environ", +] + +[[package]] +name = "wasmtime-environ" +version = "19.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7e3f2aa72dbb64c19708646e1ff97650f34e254598b82bad5578ea9c80edd30" +dependencies = [ + "anyhow", + "bincode", + "cranelift-entity", + "gimli", + "indexmap", + "log", + "object", + "serde", + "serde_derive", + "target-lexicon", + "thiserror", + "wasmparser 0.201.0", + "wasmtime-types", +] + +[[package]] +name = "wasmtime-jit-icache-coherence" +version = "19.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c22ca2ef4d87b23d400660373453e274b2251bc2d674e3102497f690135e04b0" +dependencies = [ + "cfg-if", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "wasmtime-runtime" +version = "19.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1806ee242ca4fd183309b7406e4e83ae7739b7569f395d56700de7c7ef9f5eb8" +dependencies = [ + "anyhow", + "cc", + "cfg-if", + "indexmap", + "libc", + "log", + "mach", + "memfd", + "memoffset", + "paste", + "psm", + "rustix", + "sptr", + "wasm-encoder", + "wasmtime-asm-macros", + "wasmtime-environ", + "wasmtime-versioned-export-macros", + "wasmtime-wmemcheck", + "windows-sys 0.52.0", +] + +[[package]] +name = "wasmtime-slab" +version = "19.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20c58bef9ce877fd06acb58f08d003af17cb05cc51225b455e999fbad8e584c0" + +[[package]] +name = "wasmtime-types" +version = "19.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cebe297aa063136d9d2e5b347c1528868aa43c2c8d0e1eb0eec144567e38fe0f" +dependencies = [ + "cranelift-entity", + "serde", + "serde_derive", + "thiserror", + "wasmparser 0.201.0", +] + +[[package]] +name = "wasmtime-versioned-export-macros" +version = "19.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffaafa5c12355b1a9ee068e9295d50c4ca0a400c721950cdae4f5b54391a2da5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "wasmtime-wmemcheck" +version = "19.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" +checksum = "c9a8c62e9df8322b2166d2a6f096fbec195ddb093748fd74170dcf25ef596769" [[package]] name = "web-sys" -version = "0.3.61" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" dependencies = [ "js-sys", "wasm-bindgen", @@ -992,12 +1857,12 @@ dependencies = [ [[package]] name = "webbrowser" -version = "0.8.8" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "579cc485bd5ce5bfa0d738e4921dd0b956eca9800be1fd2e5257ebe95bc4617e" +checksum = "60b6f804e41d0852e16d2eaee61c7e4f7d3e8ffdb7b8ed85886aeb0791fe9fcd" dependencies = [ "core-foundation", - "dirs 4.0.0", + "home", "jni", "log", "ndk-context", @@ -1009,13 +1874,14 @@ dependencies = [ [[package]] name = "which" -version = "4.4.0" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" dependencies = [ "either", - "libc", + "home", "once_cell", + "rustix", ] [[package]] @@ -1036,11 +1902,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "winapi", + "windows-sys 0.52.0", ] [[package]] @@ -1055,7 +1921,25 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets", + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.5", ] [[package]] @@ -1064,13 +1948,44 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", ] [[package]] @@ -1079,38 +1994,175 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + [[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + [[package]] name = "windows_i686_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "winnow" +version = "0.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b9415ee827af173ebb3f15f9083df5a122eb93572ec28741fb153356ea2578" +dependencies = [ + "memchr", +] + +[[package]] +name = "xtask" +version = "0.1.0" +dependencies = [ + "git2", + "indoc", + "semver", + "serde", + "serde_json", + "toml", +] + +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + +[[package]] +name = "zerocopy" +version = "0.7.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "087eca3c1eaf8c47b94d02790dd086cd594b912d2043d4de4bfdd466b3befb7c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f4b6c273f496d8fd4eaf18853e6b448760225dc030ff2c485a786859aea6393" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index f69dbc4..53caf23 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,92 @@ [workspace] default-members = ["cli"] -members = ["cli", "lib"] +members = [ + "cli", + "cli/config", + "cli/loader", + "lib", + "tags", + "highlight", + "xtask", +] resolver = "2" [workspace.package] -rust-version = "1.65" +version = "0.22.6" +authors = ["Max Brunsfeld "] +edition = "2021" +rust-version = "1.74.1" +homepage = "https://tree-sitter.github.io/tree-sitter" +repository = "https://github.com/tree-sitter/tree-sitter" +license = "MIT" +keywords = ["incremental", "parsing"] +categories = ["command-line-utilities", "parsing"] -[profile.release] -strip = true +[profile.optimize] +inherits = "release" +strip = true # Automatically strip symbols from the binary. +lto = true # Link-time optimization. +opt-level = 3 # Optimization level 3. +codegen-units = 1 # Maximum size reduction optimizations. + +[profile.size] +inherits = "optimize" +opt-level = "s" # Optimize for size. + +[profile.profile] +inherits = "optimize" +strip = false + +[workspace.dependencies] +ansi_term = "0.12.1" +anstyle = "1.0.6" +anyhow = "1.0.82" +cc = "1.0.95" +clap = { version = "4.5.4", features = [ + "cargo", + "derive", + "env", + "help", + "unstable-styles", +] } +ctor = "0.2.8" +ctrlc = { version = "3.4.4", features = ["termination"] } +difference = "2.0.0" +dirs = "5.0.1" +filetime = "0.2.23" +fs4 = "0.8.2" +git2 = "0.18.3" +glob = "0.3.1" +heck = "0.5.0" +html-escape = "0.2.13" +indexmap = "2.2.6" +indoc = "2.0.5" +lazy_static = "1.4.0" +libloading = "0.8.3" +log = { version = "0.4.21", features = ["std"] } +memchr = "2.7.2" +once_cell = "1.19.0" +pretty_assertions = "1.4.0" +rand = "0.8.5" +regex = "1.10.4" +regex-syntax = "0.8.3" +rustc-hash = "1.1.0" +semver = "1.0.22" +serde = { version = "1.0.198", features = ["derive"] } +serde_derive = "1.0.197" +serde_json = { version = "1.0.116", features = ["preserve_order"] } +smallbitvec = "2.5.3" +tempfile = "3.10.1" +thiserror = "1.0.59" +tiny_http = "0.12.0" +toml = "0.8.12" +unindent = "0.2.3" +walkdir = "2.5.0" +wasmparser = "0.206.0" +webbrowser = "1.0.0" + +tree-sitter = { version = "0.22.6", path = "./lib" } +tree-sitter-loader = { version = "0.22.6", path = "./cli/loader" } +tree-sitter-config = { version = "0.22.6", path = "./cli/config" } +tree-sitter-highlight = { version = "0.22.6", path = "./highlight" } +tree-sitter-tags = { version = "0.22.6", path = "./tags" } diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..5faedb0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM rust:1.76-buster + +WORKDIR /app + +RUN apt-get update +RUN apt-get install -y nodejs + +COPY . . + +CMD cargo test --all-features diff --git a/FUNDING.json b/FUNDING.json new file mode 100644 index 0000000..3360693 --- /dev/null +++ b/FUNDING.json @@ -0,0 +1,7 @@ +{ + "drips": { + "ethereum": { + "ownedBy": "0xc01246694085eF6914C527EBdFb4d8C77dfeaf8e" + } + } +} diff --git a/LICENSE b/LICENSE index 4c22002..451fe1d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2018-2021 Max Brunsfeld +Copyright (c) 2018-2024 Max Brunsfeld Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index 69f6f59..e021e87 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -VERSION := 0.20.9 +VERSION := 0.22.6 # install directory layout PREFIX ?= /usr/local @@ -18,15 +18,19 @@ endif OBJ := $(SRC:.c=.o) # define default flags, and override to append mandatory flags -CFLAGS ?= -O3 -Wall -Wextra -Werror -override CFLAGS += -std=gnu99 -fPIC -Ilib/src -Ilib/include +ARFLAGS := rcs +CFLAGS ?= -O3 -Wall -Wextra -Wshadow -pedantic +override CFLAGS += -std=c11 -fPIC -fvisibility=hidden +override CFLAGS += -Ilib/src -Ilib/src/wasm -Ilib/include # ABI versioning -SONAME_MAJOR := 0 -SONAME_MINOR := 0 +SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION))) +SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION))) # OS-specific bits -ifeq ($(shell uname),Darwin) +ifeq ($(OS),Windows_NT) + $(error "Windows is not supported") +else ifeq ($(shell uname),Darwin) SOEXT = dylib SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib @@ -37,35 +41,72 @@ else SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR) LINKSHARED += -shared -Wl,-soname,libtree-sitter.so.$(SONAME_MAJOR) endif -ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly)) +ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),) PCLIBDIR := $(PREFIX)/libdata/pkgconfig endif -all: libtree-sitter.a libtree-sitter.$(SOEXTVER) +all: libtree-sitter.a libtree-sitter.$(SOEXT) tree-sitter.pc libtree-sitter.a: $(OBJ) - $(AR) rcs $@ $^ + $(AR) $(ARFLAGS) $@ $^ -libtree-sitter.$(SOEXTVER): $(OBJ) +libtree-sitter.$(SOEXT): $(OBJ) $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ - ln -sf $@ libtree-sitter.$(SOEXT) - ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR) +ifneq ($(STRIP),) + $(STRIP) $@ +endif + +tree-sitter.pc: tree-sitter.pc.in + sed -e 's|@VERSION@|$(VERSION)|' \ + -e 's|@LIBDIR@|$(LIBDIR)|' \ + -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \ + -e 's|=$(PREFIX)|=$${prefix}|' \ + -e 's|@PREFIX@|$(PREFIX)|' $< > $@ + +clean: + $(RM) $(OBJ) tree-sitter.pc libtree-sitter.a libtree-sitter.$(SOEXT) install: all - install -d '$(DESTDIR)$(LIBDIR)' - install -m755 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a - install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) + install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)' + install -m644 lib/include/tree_sitter/api.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/api.h + install -m644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc + install -m644 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a + install -m755 libtree-sitter.$(SOEXT) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) - ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) - install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter - install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/ - install -d '$(DESTDIR)$(PCLIBDIR)' - sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \ - -e 's|=$(PREFIX)|=$${prefix}|' \ - -e 's|@PREFIX@|$(PREFIX)|' \ - tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc + ln -sf libtree-sitter.$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) -clean: - rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER) +uninstall: + $(RM) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a \ + '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) \ + '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) \ + '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) \ + '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/api.h \ + '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc + +.PHONY: all install uninstall clean + + +##### Dev targets ##### + +test: + script/fetch-fixtures + script/generate-fixtures + script/test + +test_wasm: + script/generate-fixtures-wasm + script/test-wasm + +lint: + cargo update --workspace --locked --quiet + cargo check --workspace --all-targets + cargo +nightly fmt --all --check + cargo clippy --workspace --all-targets -- -D warnings + +format: + cargo +nightly fmt --all + +changelog: + @git-cliff --config script/cliff.toml --output CHANGELOG.md --latest --github-token $(shell gh auth token) -.PHONY: all install clean +.PHONY: test test_wasm lint format changelog diff --git a/Package.swift b/Package.swift new file mode 100644 index 0000000..79084cb --- /dev/null +++ b/Package.swift @@ -0,0 +1,41 @@ +// swift-tools-version: 5.8 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +import PackageDescription + +let package = Package( + name: "TreeSitter", + products: [ + // Products define the executables and libraries a package produces, and make them visible to other packages. + .library( + name: "TreeSitter", + targets: ["TreeSitter"]), + ], + targets: [ + .target(name: "TreeSitter", + path: "lib", + exclude: [ + "binding_rust", + "binding_web", + "node_modules", + "Cargo.toml", + "README.md", + "src/unicode/README.md", + "src/unicode/LICENSE", + "src/unicode/ICU_SHA", + "src/get_changed_ranges.c", + "src/tree_cursor.c", + "src/stack.c", + "src/node.c", + "src/lexer.c", + "src/parser.c", + "src/language.c", + "src/alloc.c", + "src/subtree.c", + "src/tree.c", + "src/query.c" + ], + sources: ["src/lib.c"]), + ], + cLanguageStandard: .c11 +) diff --git a/README.md b/README.md index 3439018..d378215 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ # tree-sitter -[![CICD](https://github.com/tree-sitter/tree-sitter/actions/workflows/CICD.yml/badge.svg)](https://github.com/tree-sitter/tree-sitter/actions/workflows/CICD.yml) [![DOI](https://zenodo.org/badge/14164618.svg)](https://zenodo.org/badge/latestdoi/14164618) +[![discord][discord]](https://discord.gg/w7nTvsVJhm) +[![matrix][matrix]](https://matrix.to/#/#tree-sitter-chat:matrix.org) Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be: @@ -11,8 +12,10 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca - **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application ## Links - - [Documentation](https://tree-sitter.github.io) - [Rust binding](lib/binding_rust/README.md) - [WASM binding](lib/binding_web/README.md) - [Command-line interface](cli/README.md) + +[discord]: https://img.shields.io/discord/1063097320771698699?logo=discord&label=discord +[matrix]: https://img.shields.io/matrix/tree-sitter-chat%3Amatrix.org?logo=matrix&label=matrix diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..4ac9fbd --- /dev/null +++ b/build.zig @@ -0,0 +1,18 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + var lib = b.addStaticLibrary(.{ + .name = "tree-sitter", + .target = b.standardTargetOptions(.{}), + .optimize = b.standardOptimizeOption(.{}), + }); + + lib.linkLibC(); + lib.addCSourceFile(.{ .file = .{ .path = "lib/src/lib.c" }, .flags = &.{"-std=c11"} }); + lib.addIncludePath(.{ .path = "lib/include" }); + lib.addIncludePath(.{ .path = "lib/src" }); + + lib.installHeadersDirectory(b.path("lib/include"), ".", .{}); + + b.installArtifact(lib); +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..c412646 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,10 @@ +.{ + .name = "tree-sitter", + .version = "0.22.6", + .paths = .{ + "build.zig", + "build.zig.zon", + "lib/src", + "lib/include", + }, +} diff --git a/cli/Cargo.toml b/cli/Cargo.toml index f9f8ca4..fd2136a 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,82 +1,70 @@ [package] name = "tree-sitter-cli" +version.workspace = true description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.20.8" -authors = ["Max Brunsfeld "] -edition = "2021" -license = "MIT" -readme = "README.md" -keywords = ["incremental", "parsing"] -categories = ["command-line-utilities", "parsing"] -repository = "https://github.com/tree-sitter/tree-sitter" +authors.workspace = true +edition.workspace = true rust-version.workspace = true +readme = "README.md" +homepage.workspace = true +repository.workspace = true +license.workspace = true +keywords.workspace = true +categories.workspace = true [[bin]] name = "tree-sitter" path = "src/main.rs" +doc = false [[bench]] name = "benchmark" harness = false -[dependencies] -ansi_term = "0.12" -anyhow = "1.0" -atty = "0.2" -clap = "2.32" -difference = "2.0" -dirs = "3.0" -glob = "0.3.0" -html-escape = "0.2.6" -indexmap = "1" -lazy_static = "1.2.0" -regex = "1" -regex-syntax = "0.6.4" -rustc-hash = "1" -semver = "1.0" -serde = { version = "1.0.130", features = ["derive"] } -smallbitvec = "2.5.1" -tiny_http = "0.12.0" -walkdir = "2.3" -webbrowser = "0.8.3" -which = "4.1.0" - -[dependencies.tree-sitter] -version = "0.20.10" -path = "../lib" - -[dependencies.tree-sitter-config] -version = "0.19.0" -path = "config" +[features] +wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"] -[dependencies.tree-sitter-highlight] -version = "0.20" -path = "../highlight" - -[dependencies.tree-sitter-loader] -version = "0.20" -path = "loader" - -[dependencies.tree-sitter-tags] -version = "0.20" -path = "../tags" - -[dependencies.serde_json] -version = "1.0" -features = ["preserve_order"] +[dependencies] +ansi_term.workspace = true +anstyle.workspace = true +anyhow.workspace = true +clap.workspace = true +ctrlc.workspace = true +difference.workspace = true +dirs.workspace = true +filetime.workspace = true +glob.workspace = true +heck.workspace = true +html-escape.workspace = true +indexmap.workspace = true +indoc.workspace = true +lazy_static.workspace = true +log.workspace = true +memchr.workspace = true +regex.workspace = true +regex-syntax.workspace = true +rustc-hash.workspace = true +semver.workspace = true +serde.workspace = true +serde_derive.workspace = true +serde_json.workspace = true +smallbitvec.workspace = true +tiny_http.workspace = true +walkdir.workspace = true +wasmparser.workspace = true +webbrowser.workspace = true -[dependencies.log] -version = "0.4.6" -features = ["std"] +tree-sitter.workspace = true +tree-sitter-config.workspace = true +tree-sitter-highlight.workspace = true +tree-sitter-loader.workspace = true +tree-sitter-tags.workspace = true [dev-dependencies] -proc_macro = { path = "src/tests/proc_macro" } - -rand = "0.8" -tempfile = "3" -pretty_assertions = "0.7.2" -ctor = "0.1" -unindent = "0.2" +tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" } -[build-dependencies] -toml = "0.5" +rand.workspace = true +tempfile.workspace = true +pretty_assertions.workspace = true +ctor.workspace = true +unindent.workspace = true diff --git a/cli/README.md b/cli/README.md index 8cdda9c..eb93bcf 100644 --- a/cli/README.md +++ b/cli/README.md @@ -1,7 +1,11 @@ -Tree-sitter CLI -=============== +# Tree-sitter CLI -[![Crates.io](https://img.shields.io/crates/v/tree-sitter-cli.svg)](https://crates.io/crates/tree-sitter-cli) +[![crates.io badge]][crates.io] [![npmjs.com badge]][npmjs.com] + +[crates.io]: https://crates.io/crates/tree-sitter-cli +[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-cli.svg?color=%23B48723 +[npmjs.com]: https://www.npmjs.org/package/tree-sitter-cli +[npmjs.com badge]: https://img.shields.io/npm/v/tree-sitter-cli.svg?color=%23BF4A4A The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on MacOS, Linux, and Windows. @@ -19,7 +23,7 @@ or with `npm`: npm install tree-sitter-cli ``` -You can also download a pre-built binary for your platform from [the releases page](https://github.com/tree-sitter/tree-sitter/releases/latest). +You can also download a pre-built binary for your platform from [the releases page]. ### Dependencies @@ -30,8 +34,11 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have ### Commands -* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information. +* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation] for more information. -* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information. +* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation] for more information. * `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers. + +[the documentation]: https://tree-sitter.github.io/tree-sitter/creating-parsers +[the releases page]: https://github.com/tree-sitter/tree-sitter/releases/latest diff --git a/cli/benches/benchmark.rs b/cli/benches/benchmark.rs index efb73f3..8957227 100644 --- a/cli/benches/benchmark.rs +++ b/cli/benches/benchmark.rs @@ -1,11 +1,16 @@ +use std::{ + collections::BTreeMap, + env, fs, + path::{Path, PathBuf}, + str, + time::Instant, + usize, +}; + use anyhow::Context; use lazy_static::lazy_static; -use std::collections::BTreeMap; -use std::path::{Path, PathBuf}; -use std::time::Instant; -use std::{env, fs, str, usize}; use tree_sitter::{Language, Parser, Query}; -use tree_sitter_loader::Loader; +use tree_sitter_loader::{CompileConfig, Loader}; include!("../src/tests/helpers/dirs.rs"); @@ -15,7 +20,7 @@ lazy_static! { static ref EXAMPLE_FILTER: Option = env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok(); static ref REPETITION_COUNT: usize = env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT") - .map(|s| usize::from_str_radix(&s, 10).unwrap()) + .map(|s| s.parse::().unwrap()) .unwrap_or(5); static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone()); static ref EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: BTreeMap, Vec)> = { @@ -25,29 +30,29 @@ lazy_static! { let (example_paths, query_paths) = result.entry(relative_path.to_owned()).or_default(); - if let Ok(example_files) = fs::read_dir(&dir.join("examples")) { + if let Ok(example_files) = fs::read_dir(dir.join("examples")) { example_paths.extend(example_files.filter_map(|p| { let p = p.unwrap().path(); if p.is_file() { - Some(p.to_owned()) + Some(p) } else { None } })); } - if let Ok(query_files) = fs::read_dir(&dir.join("queries")) { + if let Ok(query_files) = fs::read_dir(dir.join("queries")) { query_paths.extend(query_files.filter_map(|p| { let p = p.unwrap().path(); if p.is_file() { - Some(p.to_owned()) + Some(p) } else { None } })); } } else { - for entry in fs::read_dir(&dir).unwrap() { + for entry in fs::read_dir(dir).unwrap() { let entry = entry.unwrap().path(); if entry.is_dir() { process_dir(result, &entry); @@ -90,9 +95,9 @@ fn main() { } } - eprintln!("\nLanguage: {}", language_name); + eprintln!("\nLanguage: {language_name}"); let language = get_language(language_path); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); eprintln!(" Constructing Queries"); for path in query_paths { @@ -102,8 +107,9 @@ fn main() { } } - parse(&path, max_path_length, |source| { - Query::new(language, str::from_utf8(source).unwrap()) + parse(path, max_path_length, |source| { + Query::new(&language, str::from_utf8(source).unwrap()) + .with_context(|| format!("Query file path: {path:?}")) .expect("Failed to parse query"); }); } @@ -143,13 +149,13 @@ fn main() { } if let Some((average_normal, worst_normal)) = aggregate(&normal_speeds) { - eprintln!(" Average Speed (normal): {} bytes/ms", average_normal); - eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal); + eprintln!(" Average Speed (normal): {average_normal} bytes/ms"); + eprintln!(" Worst Speed (normal): {worst_normal} bytes/ms"); } if let Some((average_error, worst_error)) = aggregate(&error_speeds) { - eprintln!(" Average Speed (errors): {} bytes/ms", average_error); - eprintln!(" Worst Speed (errors): {} bytes/ms", worst_error); + eprintln!(" Average Speed (errors): {average_error} bytes/ms"); + eprintln!(" Worst Speed (errors): {worst_error} bytes/ms"); } all_normal_speeds.extend(normal_speeds); @@ -158,24 +164,24 @@ fn main() { eprintln!("\n Overall"); if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) { - eprintln!(" Average Speed (normal): {} bytes/ms", average_normal); - eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal); + eprintln!(" Average Speed (normal): {average_normal} bytes/ms"); + eprintln!(" Worst Speed (normal): {worst_normal} bytes/ms"); } if let Some((average_error, worst_error)) = aggregate(&all_error_speeds) { - eprintln!(" Average Speed (errors): {} bytes/ms", average_error); - eprintln!(" Worst Speed (errors): {} bytes/ms", worst_error); + eprintln!(" Average Speed (errors): {average_error} bytes/ms"); + eprintln!(" Worst Speed (errors): {worst_error} bytes/ms"); } - eprintln!(""); + eprintln!(); } -fn aggregate(speeds: &Vec) -> Option<(usize, usize)> { +fn aggregate(speeds: &[usize]) -> Option<(usize, usize)> { if speeds.is_empty() { return None; } let mut total = 0; let mut max = usize::MAX; - for speed in speeds.iter().cloned() { + for speed in speeds.iter().copied() { total += speed; if speed < max { max = speed; @@ -192,23 +198,26 @@ fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> ); let source_code = fs::read(path) - .with_context(|| format!("Failed to read {:?}", path)) + .with_context(|| format!("Failed to read {path:?}")) .unwrap(); let time = Instant::now(); for _ in 0..*REPETITION_COUNT { action(&source_code); } let duration = time.elapsed() / (*REPETITION_COUNT as u32); - let duration_ms = duration.as_millis(); - let speed = source_code.len() as u128 / (duration_ms + 1); - eprintln!("time {} ms\tspeed {} bytes/ms", duration_ms as usize, speed); + let duration_ns = duration.as_nanos(); + let speed = ((source_code.len() as u128) * 1_000_000) / duration_ns; + eprintln!( + "time {:>7.2} ms\t\tspeed {speed:>6} bytes/ms", + (duration_ns as f64) / 1e6, + ); speed as usize } fn get_language(path: &Path) -> Language { - let src_dir = GRAMMARS_DIR.join(path).join("src"); + let src_path = GRAMMARS_DIR.join(path).join("src"); TEST_LOADER - .load_language_at_path(&src_dir, &src_dir) - .with_context(|| format!("Failed to load language at path {:?}", src_dir)) + .load_language_at_path(CompileConfig::new(&src_path, None, None)) + .with_context(|| format!("Failed to load language at path {src_path:?}")) .unwrap() } diff --git a/cli/build.rs b/cli/build.rs index 74c6d83..a29a940 100644 --- a/cli/build.rs +++ b/cli/build.rs @@ -1,27 +1,51 @@ -use std::ffi::OsStr; -use std::path::{Path, PathBuf}; -use std::{env, fs}; +use std::{ + env, + ffi::OsStr, + fs, + path::{Path, PathBuf}, + time::SystemTime, +}; fn main() { if let Some(git_sha) = read_git_sha() { - println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha); + println!("cargo:rustc-env=BUILD_SHA={git_sha}"); } if web_playground_files_present() { - println!("cargo:rustc-cfg={}", "TREE_SITTER_EMBED_WASM_BINDING"); + println!("cargo:rustc-cfg=TREE_SITTER_EMBED_WASM_BINDING"); } - let rust_binding_version = read_rust_binding_version(); - println!( - "cargo:rustc-env={}={}", - "RUST_BINDING_VERSION", rust_binding_version, - ); + let build_time = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs_f64(); + println!("cargo:rustc-env=BUILD_TIME={build_time}"); - let emscripten_version = fs::read_to_string("emscripten-version").unwrap(); - println!( - "cargo:rustc-env={}={}", - "EMSCRIPTEN_VERSION", emscripten_version, - ); + #[cfg(any( + target_os = "linux", + target_os = "android", + target_os = "freebsd", + target_os = "openbsd", + target_os = "netbsd", + target_os = "dragonfly", + ))] + { + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()).join("dynamic-symbols.txt"); + std::fs::write( + &out_dir, + "{ + ts_current_malloc; + ts_current_calloc; + ts_current_realloc; + ts_current_free; + };", + ) + .unwrap(); + println!( + "cargo:rustc-link-arg=-Wl,--dynamic-list={}", + out_dir.display() + ); + } } fn web_playground_files_present() -> bool { @@ -42,7 +66,8 @@ fn read_git_sha() -> Option { git_path = repo_path.join(".git"); if git_path.exists() { break; - } else if !repo_path.pop() { + } + if !repo_path.pop() { return None; } } @@ -57,10 +82,10 @@ fn read_git_sha() -> Option { } let git_head_path = git_dir_path.join("HEAD"); if let Some(path) = git_head_path.to_str() { - println!("cargo:rerun-if-changed={}", path); + println!("cargo:rerun-if-changed={path}"); } if let Ok(mut head_content) = fs::read_to_string(&git_head_path) { - if head_content.ends_with("\n") { + if head_content.ends_with('\n') { head_content.pop(); } @@ -71,13 +96,12 @@ fn read_git_sha() -> Option { // Go to real non-worktree gitdir let git_dir_path = git_dir_path .parent() - .map(|p| { + .and_then(|p| { p.file_name() .map(|n| n == OsStr::new("worktrees")) .and_then(|x| x.then(|| p.parent())) }) .flatten() - .flatten() .unwrap_or(&git_dir_path); let file = git_dir_path.join(&head_content); @@ -90,7 +114,7 @@ fn read_git_sha() -> Option { if let Some((hash, r#ref)) = line.split_once(' ') { if r#ref == head_content { if let Some(path) = packed_refs.to_str() { - println!("cargo:rerun-if-changed={}", path); + println!("cargo:rerun-if-changed={path}"); } return Some(hash.to_string()); } @@ -101,26 +125,15 @@ fn read_git_sha() -> Option { } }; if let Some(path) = ref_filename.to_str() { - println!("cargo:rerun-if-changed={}", path); + println!("cargo:rerun-if-changed={path}"); } return fs::read_to_string(&ref_filename).ok(); } // If we're on a detached commit, then the `HEAD` file itself contains the sha. - else if head_content.len() == 40 { + if head_content.len() == 40 { return Some(head_content); } } None } - -fn read_rust_binding_version() -> String { - let path = "Cargo.toml"; - let text = fs::read_to_string(path).unwrap(); - let cargo_toml = toml::from_str::(text.as_ref()).unwrap(); - cargo_toml["dependencies"]["tree-sitter"]["version"] - .as_str() - .unwrap() - .trim_matches('"') - .to_string() -} diff --git a/cli/config/Cargo.toml b/cli/config/Cargo.toml index 114d6ce..8379a54 100644 --- a/cli/config/Cargo.toml +++ b/cli/config/Cargo.toml @@ -1,21 +1,19 @@ [package] name = "tree-sitter-config" +version.workspace = true description = "User configuration of tree-sitter's command line programs" -version = "0.19.0" -authors = ["Max Brunsfeld "] -edition = "2018" -license = "MIT" -readme = "README.md" -keywords = ["incremental", "parsing"] -categories = ["command-line-utilities", "parsing"] -repository = "https://github.com/tree-sitter/tree-sitter" +authors.workspace = true +edition.workspace = true rust-version.workspace = true +readme = "README.md" +homepage.workspace = true +repository.workspace = true +license.workspace = true +keywords.workspace = true +categories.workspace = true [dependencies] -anyhow = "1.0" -dirs = "3.0" -serde = { version = "1.0.130", features = ["derive"] } - -[dependencies.serde_json] -version = "1.0.45" -features = ["preserve_order"] +anyhow.workspace = true +dirs.workspace = true +serde.workspace = true +serde_json.workspace = true diff --git a/cli/config/README.md b/cli/config/README.md index 8cbfbcf..e7d7b39 100644 --- a/cli/config/README.md +++ b/cli/config/README.md @@ -1,5 +1,7 @@ -# `tree-sitter-config` +# Tree-sitter Config + +Manages Tree-sitter's configuration file. You can use a configuration file to control the behavior of the `tree-sitter` -command-line program. This crate implements the logic for finding and the +command-line program. This crate implements the logic for finding and the parsing the contents of the configuration file. diff --git a/cli/config/src/lib.rs b/cli/config/src/lib.rs index 3cd09b8..6d240c5 100644 --- a/cli/config/src/lib.rs +++ b/cli/config/src/lib.rs @@ -1,10 +1,10 @@ -//! Manages tree-sitter's configuration file. +#![doc = include_str!("../README.md")] + +use std::{env, fs, path::PathBuf}; use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; use serde_json::Value; -use std::path::PathBuf; -use std::{env, fs}; /// Holds the contents of tree-sitter's configuration file. /// @@ -39,7 +39,7 @@ impl Config { } let legacy_path = dirs::home_dir() - .ok_or(anyhow!("Cannot determine home directory"))? + .ok_or_else(|| anyhow!("Cannot determine home directory"))? .join(".tree-sitter") .join("config.json"); if legacy_path.is_file() { @@ -51,7 +51,7 @@ impl Config { fn xdg_config_file() -> Result { let xdg_path = dirs::config_dir() - .ok_or(anyhow!("Cannot determine config directory"))? + .ok_or_else(|| anyhow!("Cannot determine config directory"))? .join("tree-sitter") .join("config.json"); Ok(xdg_path) @@ -60,21 +60,26 @@ impl Config { /// Locates and loads in the user's configuration file. We search for the configuration file /// in the following locations, in order: /// + /// - Location specified by the path parameter if provided /// - `$TREE_SITTER_DIR/config.json`, if the `TREE_SITTER_DIR` environment variable is set - /// - `tree-sitter/config.json` in your default user configuration directory, as determined - /// by [`dirs::config_dir`](https://docs.rs/dirs/*/dirs/fn.config_dir.html) + /// - `tree-sitter/config.json` in your default user configuration directory, as determined by + /// [`dirs::config_dir`](https://docs.rs/dirs/*/dirs/fn.config_dir.html) /// - `$HOME/.tree-sitter/config.json` as a fallback from where tree-sitter _used_ to store /// its configuration - pub fn load() -> Result { - let location = match Self::find_config_file()? { - Some(location) => location, - None => return Config::initial(), + pub fn load(path: Option) -> Result { + let location = if let Some(path) = path { + path + } else if let Some(path) = Self::find_config_file()? { + path + } else { + return Self::initial(); }; + let content = fs::read_to_string(&location) .with_context(|| format!("Failed to read {}", &location.to_string_lossy()))?; let config = serde_json::from_str(&content) .with_context(|| format!("Bad JSON config {}", &location.to_string_lossy()))?; - Ok(Config { location, config }) + Ok(Self { location, config }) } /// Creates an empty initial configuration file. You can then use the [`Config::add`][] method @@ -83,7 +88,7 @@ impl Config { /// disk. /// /// (Note that this is typically only done by the `tree-sitter init-config` command.) - pub fn initial() -> Result { + pub fn initial() -> Result { let location = if let Ok(path) = env::var("TREE_SITTER_DIR") { let mut path = PathBuf::from(path); path.push("config.json"); @@ -92,7 +97,7 @@ impl Config { Self::xdg_config_file()? }; let config = serde_json::json!({}); - Ok(Config { location, config }) + Ok(Self { location, config }) } /// Saves this configuration to the file that it was originally loaded from. diff --git a/cli/emscripten-version b/cli/emscripten-version deleted file mode 100644 index 05b41fb..0000000 --- a/cli/emscripten-version +++ /dev/null @@ -1 +0,0 @@ -3.1.29 diff --git a/cli/loader/Cargo.toml b/cli/loader/Cargo.toml index 6af28f3..bff2f63 100644 --- a/cli/loader/Cargo.toml +++ b/cli/loader/Cargo.toml @@ -1,37 +1,33 @@ [package] name = "tree-sitter-loader" +version.workspace = true description = "Locates, builds, and loads tree-sitter grammars at runtime" -version = "0.20.0" -authors = ["Max Brunsfeld "] -edition = "2018" -license = "MIT" -readme = "README.md" -keywords = ["incremental", "parsing"] -categories = ["command-line-utilities", "parsing"] -repository = "https://github.com/tree-sitter/tree-sitter" +authors.workspace = true +edition.workspace = true rust-version.workspace = true +readme = "README.md" +homepage.workspace = true +repository.workspace = true +license.workspace = true +keywords.workspace = true +categories.workspace = true -[dependencies] -anyhow = "1.0" -cc = "^1.0.58" -dirs = "3.0" -libloading = "0.7" -once_cell = "1.7" -regex = "1" -serde = { version = "1.0.130", features = ["derive"] } - -[dependencies.serde_json] -version = "1.0" -features = ["preserve_order"] - -[dependencies.tree-sitter] -version = "0.20" -path = "../../lib" +[features] +wasm = ["tree-sitter/wasm"] -[dependencies.tree-sitter-highlight] -version = "0.20" -path = "../../highlight" +[dependencies] +anyhow.workspace = true +cc.workspace = true +dirs.workspace = true +fs4.workspace = true +indoc.workspace = true +libloading.workspace = true +once_cell.workspace = true +regex.workspace = true +serde.workspace = true +serde_json.workspace = true +tempfile.workspace = true -[dependencies.tree-sitter-tags] -version = "0.20" -path = "../../tags" +tree-sitter.workspace = true +tree-sitter-highlight.workspace = true +tree-sitter-tags.workspace = true diff --git a/cli/loader/README.md b/cli/loader/README.md index 9889ec7..a3c1867 100644 --- a/cli/loader/README.md +++ b/cli/loader/README.md @@ -1,6 +1,6 @@ -# `tree-sitter-loader` +# Tree-sitter Loader The `tree-sitter` command-line program will dynamically find and build grammars at runtime, if you have cloned the grammars' repositories to your local -filesystem. This helper crate implements that logic, so that you can use it in +filesystem. This helper crate implements that logic, so that you can use it in your own program analysis tools, as well. diff --git a/cli/loader/build.rs b/cli/loader/build.rs index e0ebd1c..b01f01b 100644 --- a/cli/loader/build.rs +++ b/cli/loader/build.rs @@ -3,4 +3,11 @@ fn main() { "cargo:rustc-env=BUILD_TARGET={}", std::env::var("TARGET").unwrap() ); + println!( + "cargo:rustc-env=BUILD_HOST={}", + std::env::var("HOST").unwrap() + ); + + let emscripten_version = std::fs::read_to_string("emscripten-version").unwrap(); + println!("cargo:rustc-env=EMSCRIPTEN_VERSION={emscripten_version}"); } diff --git a/cli/loader/emscripten-version b/cli/loader/emscripten-version new file mode 100644 index 0000000..5b1840f --- /dev/null +++ b/cli/loader/emscripten-version @@ -0,0 +1 @@ +3.1.55 diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs index 029da45..3371e1f 100644 --- a/cli/loader/src/lib.rs +++ b/cli/loader/src/lib.rs @@ -1,20 +1,32 @@ +#![doc = include_str!("../README.md")] + +use std::{ + collections::HashMap, + env, + ffi::{OsStr, OsString}, + fs, + io::{BufRead, BufReader}, + mem, + ops::Range, + path::{Path, PathBuf}, + process::Command, + sync::Mutex, + time::SystemTime, +}; + use anyhow::{anyhow, Context, Error, Result}; +use fs4::FileExt; +use indoc::indoc; use libloading::{Library, Symbol}; use once_cell::unsync::OnceCell; use regex::{Regex, RegexBuilder}; use serde::{Deserialize, Deserializer, Serialize}; -use std::collections::HashMap; -use std::io::BufReader; -use std::ops::Range; -use std::path::{Path, PathBuf}; -use std::process::Command; -use std::sync::Mutex; -use std::time::SystemTime; -use std::{env, fs, mem}; use tree_sitter::{Language, QueryError, QueryErrorKind}; use tree_sitter_highlight::HighlightConfiguration; use tree_sitter_tags::{Error as TagsError, TagsConfiguration}; +pub const EMSCRIPTEN_TAG: &str = concat!("docker.io/emscripten/emsdk:", env!("EMSCRIPTEN_VERSION")); + #[derive(Default, Deserialize, Serialize)] pub struct Config { #[serde(default)] @@ -33,9 +45,8 @@ where D: Deserializer<'de>, { let paths = Vec::::deserialize(deserializer)?; - let home = match dirs::home_dir() { - Some(home) => home, - None => return Ok(paths), + let Some(home) = dirs::home_dir() else { + return Ok(paths); }; let standardized = paths .into_iter() @@ -55,30 +66,29 @@ fn standardize_path(path: PathBuf, home: &Path) -> PathBuf { } impl Config { - pub fn initial() -> Config { + #[must_use] + pub fn initial() -> Self { let home_dir = dirs::home_dir().expect("Cannot determine home directory"); - Config { + Self { parser_directories: vec![ home_dir.join("github"), home_dir.join("src"), home_dir.join("source"), + home_dir.join("projects"), + home_dir.join("dev"), + home_dir.join("git"), ], } } } -#[cfg(unix)] -const DYLIB_EXTENSION: &'static str = "so"; - -#[cfg(windows)] -const DYLIB_EXTENSION: &'static str = "dll"; - -const BUILD_TARGET: &'static str = env!("BUILD_TARGET"); +const BUILD_TARGET: &str = env!("BUILD_TARGET"); +const BUILD_HOST: &str = env!("BUILD_HOST"); pub struct LanguageConfiguration<'a> { pub scope: Option, pub content_regex: Option, - pub _first_line_regex: Option, + pub first_line_regex: Option, pub injection_regex: Option, pub file_types: Vec, pub root_path: PathBuf, @@ -86,6 +96,7 @@ pub struct LanguageConfiguration<'a> { pub injections_filenames: Option>, pub locals_filenames: Option>, pub tags_filenames: Option>, + pub language_name: String, language_id: usize, highlight_config: OnceCell>, tags_config: OnceCell>, @@ -94,13 +105,49 @@ pub struct LanguageConfiguration<'a> { } pub struct Loader { - parser_lib_path: PathBuf, - languages_by_id: Vec<(PathBuf, OnceCell)>, + pub parser_lib_path: PathBuf, + languages_by_id: Vec<(PathBuf, OnceCell, Option>)>, language_configurations: Vec>, language_configuration_ids_by_file_type: HashMap>, + language_configuration_in_current_path: Option, + language_configuration_ids_by_first_line_regex: HashMap>, highlight_names: Box>>, use_all_highlight_names: bool, debug_build: bool, + + #[cfg(feature = "wasm")] + wasm_store: Mutex>, +} + +pub struct CompileConfig<'a> { + pub src_path: &'a Path, + pub header_paths: Vec<&'a Path>, + pub parser_path: PathBuf, + pub scanner_path: Option, + pub external_files: Option<&'a [PathBuf]>, + pub output_path: Option, + pub flags: &'a [&'a str], + pub name: String, +} + +impl<'a> CompileConfig<'a> { + #[must_use] + pub fn new( + src_path: &'a Path, + externals: Option<&'a [PathBuf]>, + output_path: Option, + ) -> Self { + Self { + src_path, + header_paths: vec![src_path], + parser_path: src_path.join("parser.c"), + scanner_path: None, + external_files: externals, + output_path, + flags: &[], + name: String::new(), + } + } } unsafe impl Send for Loader {} @@ -111,32 +158,39 @@ impl Loader { let parser_lib_path = match env::var("TREE_SITTER_LIBDIR") { Ok(path) => PathBuf::from(path), _ => dirs::cache_dir() - .ok_or(anyhow!("Cannot determine cache directory"))? + .ok_or_else(|| anyhow!("Cannot determine cache directory"))? .join("tree-sitter") .join("lib"), }; Ok(Self::with_parser_lib_path(parser_lib_path)) } + #[must_use] pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self { - Loader { + Self { parser_lib_path, languages_by_id: Vec::new(), language_configurations: Vec::new(), language_configuration_ids_by_file_type: HashMap::new(), + language_configuration_in_current_path: None, + language_configuration_ids_by_first_line_regex: HashMap::new(), highlight_names: Box::new(Mutex::new(Vec::new())), use_all_highlight_names: true, debug_build: false, + + #[cfg(feature = "wasm")] + wasm_store: Mutex::default(), } } - pub fn configure_highlights(&mut self, names: &Vec) { + pub fn configure_highlights(&mut self, names: &[String]) { self.use_all_highlight_names = false; let mut highlights = self.highlight_names.lock().unwrap(); highlights.clear(); highlights.extend(names.iter().cloned()); } + #[must_use] pub fn highlight_names(&self) -> Vec { self.highlight_names.lock().unwrap().clone() } @@ -146,8 +200,7 @@ impl Loader { eprintln!("Warning: You have not configured any parser directories!"); eprintln!("Please run `tree-sitter init-config` and edit the resulting"); eprintln!("configuration file to indicate where we should look for"); - eprintln!("language grammars."); - eprintln!(""); + eprintln!("language grammars.\n"); } for parser_container_dir in &config.parser_directories { if let Ok(entries) = fs::read_dir(parser_container_dir) { @@ -157,6 +210,7 @@ impl Loader { if parser_dir_name.starts_with("tree-sitter-") { self.find_language_configurations_at_path( &parser_container_dir.join(parser_dir_name), + false, ) .ok(); } @@ -167,23 +221,24 @@ impl Loader { Ok(()) } - pub fn languages_at_path(&mut self, path: &Path) -> Result> { - if let Ok(configurations) = self.find_language_configurations_at_path(path) { + pub fn languages_at_path(&mut self, path: &Path) -> Result> { + if let Ok(configurations) = self.find_language_configurations_at_path(path, true) { let mut language_ids = configurations .iter() - .map(|c| c.language_id) + .map(|c| (c.language_id, c.language_name.clone())) .collect::>(); - language_ids.sort(); + language_ids.sort_unstable(); language_ids.dedup(); language_ids .into_iter() - .map(|id| self.language_for_id(id)) + .map(|(id, name)| Ok((self.language_for_id(id)?, name))) .collect::>>() } else { Ok(Vec::new()) } } + #[must_use] pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> { self.language_configurations .iter() @@ -204,6 +259,30 @@ impl Loader { Ok(None) } + pub fn language_configuration_for_first_line_regex( + &self, + path: &Path, + ) -> Result> { + self.language_configuration_ids_by_first_line_regex + .iter() + .try_fold(None, |_, (regex, ids)| { + if let Some(regex) = Self::regex(Some(regex)) { + let file = fs::File::open(path)?; + let reader = BufReader::new(file); + let first_line = reader.lines().next().transpose()?; + if let Some(first_line) = first_line { + if regex.is_match(&first_line) && !ids.is_empty() { + let configuration = &self.language_configurations[ids[0]]; + let language = self.language_for_id(configuration.language_id)?; + return Ok(Some((language, configuration))); + } + } + } + + Ok(None) + }) + } + pub fn language_configuration_for_file_name( &self, path: &Path, @@ -224,17 +303,14 @@ impl Loader { if let Some(configuration_ids) = configuration_ids { if !configuration_ids.is_empty() { - let configuration; - - // If there is only one language configuration, then use it. - if configuration_ids.len() == 1 { - configuration = &self.language_configurations[configuration_ids[0]]; + let configuration = if configuration_ids.len() == 1 { + &self.language_configurations[configuration_ids[0]] } // If multiple language configurations match, then determine which // one to use by applying the configurations' content regexes. else { - let file_contents = fs::read(path) - .with_context(|| format!("Failed to read path {:?}", path))?; + let file_contents = + fs::read(path).with_context(|| format!("Failed to read path {path:?}"))?; let file_contents = String::from_utf8_lossy(&file_contents); let mut best_score = -2isize; let mut best_configuration_id = None; @@ -264,8 +340,8 @@ impl Loader { } } - configuration = &self.language_configurations[best_configuration_id.unwrap()]; - } + &self.language_configurations[best_configuration_id.unwrap()] + }; let language = self.language_for_id(configuration.language_id)?; return Ok(Some((language, configuration))); @@ -303,19 +379,33 @@ impl Loader { } fn language_for_id(&self, id: usize) -> Result { - let (path, language) = &self.languages_by_id[id]; + let (path, language, externals) = &self.languages_by_id[id]; language .get_or_try_init(|| { let src_path = path.join("src"); - self.load_language_at_path(&src_path, &src_path) + self.load_language_at_path(CompileConfig::new( + &src_path, + externals.as_deref(), + None, + )) }) - .map(|l| *l) + .cloned() + } + + pub fn compile_parser_at_path( + &self, + grammar_path: &Path, + output_path: PathBuf, + flags: &[&str], + ) -> Result<()> { + let src_path = grammar_path.join("src"); + let mut config = CompileConfig::new(&src_path, None, Some(output_path)); + config.flags = flags; + self.load_language_at_path(config).map(|_| ()) } - pub fn load_language_at_path(&self, src_path: &Path, header_path: &Path) -> Result { - let grammar_path = src_path.join("grammar.json"); - let parser_path = src_path.join("parser.c"); - let mut scanner_path = src_path.join("scanner.c"); + pub fn load_language_at_path(&self, mut config: CompileConfig) -> Result { + let grammar_path = config.src_path.join("grammar.json"); #[derive(Deserialize)] struct GrammarJSON { @@ -326,178 +416,505 @@ impl Loader { let grammar_json: GrammarJSON = serde_json::from_reader(BufReader::new(&mut grammar_file)) .with_context(|| "Failed to parse grammar.json")?; - let scanner_path = if scanner_path.exists() { - Some(scanner_path) - } else { - scanner_path.set_extension("cc"); - if scanner_path.exists() { - Some(scanner_path) - } else { - None - } - }; + config.name = grammar_json.name; - self.load_language_from_sources( - &grammar_json.name, - &header_path, - &parser_path, - &scanner_path, - ) + self.load_language_at_path_with_name(config) } - pub fn load_language_from_sources( - &self, - name: &str, - header_path: &Path, - parser_path: &Path, - scanner_path: &Option, - ) -> Result { - let mut lib_name = name.to_string(); + pub fn load_language_at_path_with_name(&self, mut config: CompileConfig) -> Result { + let mut lib_name = config.name.to_string(); + let language_fn_name = format!( + "tree_sitter_{}", + replace_dashes_with_underscores(&config.name) + ); if self.debug_build { lib_name.push_str(".debug._"); } - let mut library_path = self.parser_lib_path.join(lib_name); - library_path.set_extension(DYLIB_EXTENSION); - - let recompile = needs_recompile(&library_path, &parser_path, &scanner_path) - .with_context(|| "Failed to compare source and binary timestamps")?; - if recompile { + if config.output_path.is_none() { fs::create_dir_all(&self.parser_lib_path)?; - let mut config = cc::Build::new(); + } + + let mut recompile = config.output_path.is_some(); // if specified, always recompile + + let output_path = config.output_path.unwrap_or_else(|| { + let mut path = self.parser_lib_path.join(lib_name); + path.set_extension(env::consts::DLL_EXTENSION); + #[cfg(feature = "wasm")] + if self.wasm_store.lock().unwrap().is_some() { + path.set_extension("wasm"); + } + path + }); + config.output_path = Some(output_path.clone()); + + let parser_path = config.src_path.join("parser.c"); + config.scanner_path = self.get_scanner_path(config.src_path); + + let mut paths_to_check = vec![parser_path]; + + if let Some(scanner_path) = config.scanner_path.as_ref() { + paths_to_check.push(scanner_path.clone()); + } + + paths_to_check.extend( config - .cpp(true) - .opt_level(2) - .cargo_metadata(false) - .target(BUILD_TARGET) - .host(BUILD_TARGET); - let compiler = config.get_compiler(); - let mut command = Command::new(compiler.path()); - for (key, value) in compiler.env() { - command.env(key, value); + .external_files + .unwrap_or_default() + .iter() + .map(|p| config.src_path.join(p)), + ); + + if !recompile { + recompile = needs_recompile(&output_path, &paths_to_check) + .with_context(|| "Failed to compare source and binary timestamps")?; + } + + #[cfg(feature = "wasm")] + if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() { + if recompile { + self.compile_parser_to_wasm( + &config.name, + None, + config.src_path, + config + .scanner_path + .as_ref() + .and_then(|p| p.strip_prefix(config.src_path).ok()), + &output_path, + false, + )?; } - if cfg!(windows) { - command.args(&["/nologo", "/LD", "/I"]).arg(header_path); - if self.debug_build { - command.arg("/Od"); - } else { - command.arg("/O2"); - } - command.arg(parser_path); - if let Some(scanner_path) = scanner_path.as_ref() { - command.arg(scanner_path); - } - command - .arg("/link") - .arg(format!("/out:{}", library_path.to_str().unwrap())); - } else { - command - .arg("-shared") - .arg("-fPIC") - .arg("-fno-exceptions") - .arg("-g") - .arg("-I") - .arg(header_path) - .arg("-o") - .arg(&library_path); - - if self.debug_build { - command.arg("-O0"); - } else { - command.arg("-O2"); - } + let wasm_bytes = fs::read(&output_path)?; + return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?); + } - // For conditional compilation of external scanner code when - // used internally by `tree-siteer parse` and other sub commands. - command.arg("-DTREE_SITTER_INTERNAL_BUILD"); + let lock_path = if env::var("CROSS_RUNNER").is_ok() { + tempfile::tempdir() + .unwrap() + .path() + .join("tree-sitter") + .join("lock") + .join(format!("{}.lock", config.name)) + } else { + dirs::cache_dir() + .ok_or_else(|| anyhow!("Cannot determine cache directory"))? + .join("tree-sitter") + .join("lock") + .join(format!("{}.lock", config.name)) + }; - if let Some(scanner_path) = scanner_path.as_ref() { - if scanner_path.extension() == Some("c".as_ref()) { - command.arg("-xc").arg("-std=c99").arg(scanner_path); - } else { - command.arg(scanner_path); - } + if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) { + recompile = false; + if lock_file.try_lock_exclusive().is_err() { + // if we can't acquire the lock, another process is compiling the parser, wait for + // it and don't recompile + lock_file.lock_exclusive()?; + recompile = false; + } else { + // if we can acquire the lock, check if the lock file is older than 30 seconds, a + // run that was interrupted and left the lock file behind should not block + // subsequent runs + let time = lock_file.metadata()?.modified()?.elapsed()?.as_secs(); + if time > 30 { + fs::remove_file(&lock_path)?; + recompile = true; } - command.arg("-xc").arg(parser_path); } + } - let output = command - .output() - .with_context(|| "Failed to execute C compiler")?; - if !output.status.success() { - return Err(anyhow!( - "Parser compilation failed.\nStdout: {}\nStderr: {}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - )); + if recompile { + fs::create_dir_all(lock_path.parent().unwrap()).with_context(|| { + format!( + "Failed to create directory {:?}", + lock_path.parent().unwrap() + ) + })?; + let lock_file = fs::OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&lock_path)?; + lock_file.lock_exclusive()?; + + self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?; + + if config.scanner_path.is_some() { + self.check_external_scanner(&config.name, &output_path)?; } } - let library = unsafe { Library::new(&library_path) } - .with_context(|| format!("Error opening dynamic library {:?}", &library_path))?; - let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name)); + let library = unsafe { Library::new(&output_path) } + .with_context(|| format!("Error opening dynamic library {output_path:?}"))?; let language = unsafe { - let language_fn: Symbol Language> = library - .get(language_fn_name.as_bytes()) - .with_context(|| format!("Failed to load symbol {}", language_fn_name))?; + let language_fn = library + .get:: Language>>(language_fn_name.as_bytes()) + .with_context(|| format!("Failed to load symbol {language_fn_name}"))?; language_fn() }; mem::forget(library); Ok(language) } + fn compile_parser_to_dylib( + &self, + config: &CompileConfig, + lock_file: &fs::File, + lock_path: &Path, + ) -> Result<(), Error> { + let mut cc_config = cc::Build::new(); + cc_config + .cargo_metadata(false) + .cargo_warnings(false) + .target(BUILD_TARGET) + .host(BUILD_HOST) + .file(&config.parser_path) + .includes(&config.header_paths); + + if let Some(scanner_path) = config.scanner_path.as_ref() { + if scanner_path.extension() != Some("c".as_ref()) { + cc_config.cpp(true); + eprintln!("Warning: Using a C++ scanner is now deprecated. Please migrate your scanner code to C, as C++ support will be removed in the near future."); + } else { + cc_config.std("c11"); + } + cc_config.file(scanner_path); + } + + if self.debug_build { + cc_config.opt_level(0).extra_warnings(true); + } else { + cc_config.opt_level(2).extra_warnings(false); + } + + for flag in config.flags { + cc_config.define(flag, None); + } + + let compiler = cc_config.get_compiler(); + let mut command = Command::new(compiler.path()); + command.args(compiler.args()); + for (key, value) in compiler.env() { + command.env(key, value); + } + + let output_path = config.output_path.as_ref().unwrap(); + + if compiler.is_like_msvc() { + let out = format!("-out:{}", output_path.to_str().unwrap()); + command.arg(if self.debug_build { "-LDd" } else { "-LD" }); + command.arg("-utf-8"); + command.args(cc_config.get_files()); + command.arg("-link").arg(out); + } else { + command.args(["-Werror=implicit-function-declaration", "-g"]); + if cfg!(any(target_os = "macos", target_os = "ios")) { + command.arg("-dynamiclib"); + // TODO: remove when supported + command.arg("-UTREE_SITTER_REUSE_ALLOCATOR"); + } else { + command.arg("-shared"); + } + command.args(cc_config.get_files()); + command.arg("-o").arg(output_path); + } + + let output = command.output().with_context(|| { + format!("Failed to execute the C compiler with the following command:\n{command:?}") + })?; + + lock_file.unlock()?; + fs::remove_file(lock_path)?; + + if output.status.success() { + Ok(()) + } else { + Err(anyhow!( + "Parser compilation failed.\nStdout: {}\nStderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + )) + } + } + + #[cfg(unix)] + fn check_external_scanner(&self, name: &str, library_path: &Path) -> Result<()> { + let prefix = if cfg!(any(target_os = "macos", target_os = "ios")) { + "_" + } else { + "" + }; + let mut must_have = vec![ + format!("{prefix}tree_sitter_{name}_external_scanner_create"), + format!("{prefix}tree_sitter_{name}_external_scanner_destroy"), + format!("{prefix}tree_sitter_{name}_external_scanner_serialize"), + format!("{prefix}tree_sitter_{name}_external_scanner_deserialize"), + format!("{prefix}tree_sitter_{name}_external_scanner_scan"), + ]; + + let command = Command::new("nm") + .arg("-W") + .arg("-U") + .arg(library_path) + .output(); + if let Ok(output) = command { + if output.status.success() { + let mut found_non_static = false; + for line in String::from_utf8_lossy(&output.stdout).lines() { + if line.contains(" T ") { + if let Some(function_name) = + line.split_whitespace().collect::>().get(2) + { + if !line.contains("tree_sitter_") { + if !found_non_static { + found_non_static = true; + eprintln!("Warning: Found non-static non-tree-sitter functions in the external scannner"); + } + eprintln!(" `{function_name}`"); + } else { + must_have.retain(|f| f != function_name); + } + } + } + } + if found_non_static { + eprintln!("Consider making these functions static, they can cause conflicts when another tree-sitter project uses the same function name"); + } + + if !must_have.is_empty() { + let missing = must_have + .iter() + .map(|f| format!(" `{f}`")) + .collect::>() + .join("\n"); + + return Err(anyhow!(format!( + indoc! {" + Missing required functions in the external scanner, parsing won't work without these! + + {} + + You can read more about this at https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners + "}, + missing, + ))); + } + } + } + + Ok(()) + } + + #[cfg(windows)] + fn check_external_scanner(&self, _name: &str, _library_path: &Path) -> Result<()> { + // TODO: there's no nm command on windows, whoever wants to implement this can and should :) + + // let mut must_have = vec![ + // format!("tree_sitter_{name}_external_scanner_create"), + // format!("tree_sitter_{name}_external_scanner_destroy"), + // format!("tree_sitter_{name}_external_scanner_serialize"), + // format!("tree_sitter_{name}_external_scanner_deserialize"), + // format!("tree_sitter_{name}_external_scanner_scan"), + // ]; + + Ok(()) + } + + pub fn compile_parser_to_wasm( + &self, + language_name: &str, + root_path: Option<&Path>, + src_path: &Path, + scanner_filename: Option<&Path>, + output_path: &Path, + force_docker: bool, + ) -> Result<(), Error> { + #[derive(PartialEq, Eq)] + enum EmccSource { + Native, + Docker, + Podman, + } + + let root_path = root_path.unwrap_or(src_path); + let emcc_name = if cfg!(windows) { "emcc.bat" } else { "emcc" }; + + // Order of preference: emscripten > docker > podman > error + let source = if !force_docker && Command::new(emcc_name).output().is_ok() { + EmccSource::Native + } else if Command::new("docker") + .arg("info") + .output() + .map_or(false, |out| out.status.success()) + { + EmccSource::Docker + } else if Command::new("podman") + .arg("--version") + .output() + .map_or(false, |out| out.status.success()) + { + EmccSource::Podman + } else { + return Err(anyhow!( + "You must have either emcc, docker, or podman on your PATH to run this command" + )); + }; + + let mut command = match source { + EmccSource::Native => { + let mut command = Command::new(emcc_name); + command.current_dir(src_path); + command + } + + EmccSource::Docker | EmccSource::Podman => { + let mut command = match source { + EmccSource::Docker => Command::new("docker"), + EmccSource::Podman => Command::new("podman"), + _ => unreachable!(), + }; + command.args(["run", "--rm"]); + + // The working directory is the directory containing the parser itself + let workdir = if root_path == src_path { + PathBuf::from("/src") + } else { + let mut path = PathBuf::from("/src"); + path.push(src_path.strip_prefix(root_path).unwrap()); + path + }; + command.args(["--workdir", &workdir.to_string_lossy()]); + + // Mount the root directory as a volume, which is the repo root + let mut volume_string = OsString::from(&root_path); + volume_string.push(":/src:Z"); + command.args([OsStr::new("--volume"), &volume_string]); + + // In case `docker` is an alias to `podman`, ensure that podman + // mounts the current directory as writable by the container + // user which has the same uid as the host user. Setting the + // podman-specific variable is more reliable than attempting to + // detect whether `docker` is an alias for `podman`. + // see https://docs.podman.io/en/latest/markdown/podman-run.1.html#userns-mode + command.env("PODMAN_USERNS", "keep-id"); + + // Get the current user id so that files created in the docker container will have + // the same owner. + #[cfg(unix)] + { + #[link(name = "c")] + extern "C" { + fn getuid() -> u32; + } + // don't need to set user for podman since PODMAN_USERNS=keep-id is already set + if source == EmccSource::Docker { + let user_id = unsafe { getuid() }; + command.args(["--user", &user_id.to_string()]); + } + }; + + // Run `emcc` in a container using the `emscripten-slim` image + command.args([EMSCRIPTEN_TAG, "emcc"]); + command + } + }; + + let output_name = "output.wasm"; + + command.args([ + "-o", + output_name, + "-Os", + "-s", + "WASM=1", + "-s", + "SIDE_MODULE=2", + "-s", + "TOTAL_MEMORY=33554432", + "-s", + "NODEJS_CATCH_EXIT=0", + "-s", + &format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{language_name}\"]"), + "-fno-exceptions", + "-fvisibility=hidden", + "-I", + ".", + ]); + + if let Some(scanner_filename) = scanner_filename { + if scanner_filename + .extension() + .and_then(|ext| ext.to_str()) + .map_or(false, |ext| ["cc", "cpp"].contains(&ext)) + { + eprintln!("Warning: Using a C++ scanner is now deprecated. Please migrate your scanner code to C, as C++ support will be removed in the near future."); + command.arg("-xc++"); + } + command.arg(scanner_filename); + } + + command.arg("parser.c"); + let status = command + .spawn() + .with_context(|| "Failed to run emcc command")? + .wait()?; + if !status.success() { + return Err(anyhow!("emcc command failed")); + } + + fs::rename(src_path.join(output_name), output_path) + .context("failed to rename wasm output file")?; + + Ok(()) + } + + #[must_use] pub fn highlight_config_for_injection_string<'a>( &'a self, string: &str, ) -> Option<&'a HighlightConfiguration> { match self.language_configuration_for_injection_string(string) { Err(e) => { - eprintln!( - "Failed to load language for injection string '{}': {}", - string, e - ); + eprintln!("Failed to load language for injection string '{string}': {e}",); None } Ok(None) => None, - Ok(Some((language, configuration))) => match configuration.highlight_config(language) { - Err(e) => { - eprintln!( - "Failed to load property sheet for injection string '{}': {}", - string, e - ); - None + Ok(Some((language, configuration))) => { + match configuration.highlight_config(language, None) { + Err(e) => { + eprintln!( + "Failed to load property sheet for injection string '{string}': {e}", + ); + None + } + Ok(None) => None, + Ok(Some(config)) => Some(config), } - Ok(None) => None, - Ok(Some(config)) => Some(config), - }, + } } } - pub fn find_language_configurations_at_path<'a>( - &'a mut self, + pub fn find_language_configurations_at_path( + &mut self, parser_path: &Path, + set_current_path_config: bool, ) -> Result<&[LanguageConfiguration]> { - #[derive(Deserialize)] + #[derive(Deserialize, Clone, Default)] #[serde(untagged)] enum PathsJSON { + #[default] Empty, Single(String), Multiple(Vec), } - impl Default for PathsJSON { - fn default() -> Self { - PathsJSON::Empty - } - } - impl PathsJSON { fn into_vec(self) -> Option> { match self { - PathsJSON::Empty => None, - PathsJSON::Single(s) => Some(vec![s]), - PathsJSON::Multiple(s) => Some(s), + Self::Empty => None, + Self::Single(s) => Some(vec![s]), + Self::Multiple(s) => Some(s), } } } @@ -523,6 +940,8 @@ impl Loader { locals: PathsJSON, #[serde(default)] tags: PathsJSON, + #[serde(default, rename = "external-files")] + external_files: PathsJSON, } #[derive(Deserialize)] @@ -532,9 +951,14 @@ impl Loader { tree_sitter: Vec, } + #[derive(Deserialize)] + struct GrammarJSON { + name: String, + } + let initial_language_configuration_count = self.language_configurations.len(); - if let Ok(package_json_contents) = fs::read_to_string(&parser_path.join("package.json")) { + if let Ok(package_json_contents) = fs::read_to_string(parser_path.join("package.json")) { let package_json = serde_json::from_str::(&package_json_contents); if let Ok(package_json) = package_json { let language_count = self.languages_by_id.len(); @@ -543,10 +967,17 @@ impl Loader { // the package.json, but defaults to the directory containing the package.json. let language_path = parser_path.join(config_json.path); + let grammar_path = language_path.join("src").join("grammar.json"); + let mut grammar_file = fs::File::open(grammar_path) + .with_context(|| "Failed to read grammar.json")?; + let grammar_json: GrammarJSON = + serde_json::from_reader(BufReader::new(&mut grammar_file)) + .with_context(|| "Failed to parse grammar.json")?; + // Determine if a previous language configuration in this package.json file // already uses the same language. let mut language_id = None; - for (id, (path, _)) in + for (id, (path, _, _)) in self.languages_by_id.iter().enumerate().skip(language_count) { if language_path == *path { @@ -555,38 +986,73 @@ impl Loader { } // If not, add a new language path to the list. - let language_id = language_id.unwrap_or_else(|| { - self.languages_by_id.push((language_path, OnceCell::new())); + let language_id = if let Some(language_id) = language_id { + language_id + } else { + self.languages_by_id.push(( + language_path, + OnceCell::new(), + config_json.external_files.clone().into_vec().map(|files| { + files.into_iter() + .map(|path| { + let path = parser_path.join(path); + // prevent p being above/outside of parser_path + if path.starts_with(parser_path) { + Ok(path) + } else { + Err(anyhow!("External file path {path:?} is outside of parser directory {parser_path:?}")) + } + }) + .collect::>>() + }).transpose()?, + )); self.languages_by_id.len() - 1 - }); + }; let configuration = LanguageConfiguration { root_path: parser_path.to_path_buf(), + language_name: grammar_json.name.clone(), scope: config_json.scope, language_id, - file_types: config_json.file_types.unwrap_or(Vec::new()), - content_regex: Self::regex(config_json.content_regex), - _first_line_regex: Self::regex(config_json.first_line_regex), - injection_regex: Self::regex(config_json.injection_regex), + file_types: config_json.file_types.unwrap_or_default(), + content_regex: Self::regex(config_json.content_regex.as_deref()), + first_line_regex: Self::regex(config_json.first_line_regex.as_deref()), + injection_regex: Self::regex(config_json.injection_regex.as_deref()), injections_filenames: config_json.injections.into_vec(), locals_filenames: config_json.locals.into_vec(), tags_filenames: config_json.tags.into_vec(), highlights_filenames: config_json.highlights.into_vec(), highlight_config: OnceCell::new(), tags_config: OnceCell::new(), - highlight_names: &*self.highlight_names, + highlight_names: &self.highlight_names, use_all_highlight_names: self.use_all_highlight_names, }; for file_type in &configuration.file_types { self.language_configuration_ids_by_file_type .entry(file_type.to_string()) - .or_insert(Vec::new()) + .or_default() + .push(self.language_configurations.len()); + } + if let Some(first_line_regex) = &configuration.first_line_regex { + self.language_configuration_ids_by_first_line_regex + .entry(first_line_regex.to_string()) + .or_default() .push(self.language_configurations.len()); } - self.language_configurations - .push(unsafe { mem::transmute(configuration) }); + self.language_configurations.push(unsafe { + mem::transmute::, LanguageConfiguration<'static>>( + configuration, + ) + }); + + if set_current_path_config + && self.language_configuration_in_current_path.is_none() + { + self.language_configuration_in_current_path = + Some(self.language_configurations.len() - 1); + } } } } @@ -594,13 +1060,20 @@ impl Loader { if self.language_configurations.len() == initial_language_configuration_count && parser_path.join("src").join("grammar.json").exists() { + let grammar_path = parser_path.join("src").join("grammar.json"); + let mut grammar_file = + fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?; + let grammar_json: GrammarJSON = + serde_json::from_reader(BufReader::new(&mut grammar_file)) + .with_context(|| "Failed to parse grammar.json")?; let configuration = LanguageConfiguration { root_path: parser_path.to_owned(), + language_name: grammar_json.name, language_id: self.languages_by_id.len(), file_types: Vec::new(), scope: None, content_regex: None, - _first_line_regex: None, + first_line_regex: None, injection_regex: None, injections_filenames: None, locals_filenames: None, @@ -608,20 +1081,23 @@ impl Loader { tags_filenames: None, highlight_config: OnceCell::new(), tags_config: OnceCell::new(), - highlight_names: &*self.highlight_names, + highlight_names: &self.highlight_names, use_all_highlight_names: self.use_all_highlight_names, }; - self.language_configurations - .push(unsafe { mem::transmute(configuration) }); + self.language_configurations.push(unsafe { + mem::transmute::, LanguageConfiguration<'static>>( + configuration, + ) + }); self.languages_by_id - .push((parser_path.to_owned(), OnceCell::new())); + .push((parser_path.to_owned(), OnceCell::new(), None)); } Ok(&self.language_configurations[initial_language_configuration_count..]) } - fn regex(pattern: Option) -> Option { - pattern.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()) + fn regex(pattern: Option<&str>) -> Option { + pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok()) } pub fn select_language( @@ -633,11 +1109,11 @@ impl Loader { if let Some(scope) = scope { if let Some(config) = self .language_configuration_for_scope(scope) - .with_context(|| format!("Failed to load language for scope '{}'", scope))? + .with_context(|| format!("Failed to load language for scope '{scope}'"))? { Ok(config.0) } else { - return Err(anyhow!("Unknown scope '{}'", scope)); + Err(anyhow!("Unknown scope '{scope}'")) } } else if let Some((lang, _)) = self .language_configuration_for_file_name(path) @@ -649,13 +1125,17 @@ impl Loader { })? { Ok(lang) + } else if let Some(id) = self.language_configuration_in_current_path { + Ok(self.language_for_id(self.language_configurations[id].language_id)?) } else if let Some(lang) = self - .languages_at_path(¤t_dir) + .languages_at_path(current_dir) .with_context(|| "Failed to load language in current directory")? .first() .cloned() { - Ok(lang) + Ok(lang.0) + } else if let Some(lang) = self.language_configuration_for_first_line_regex(path)? { + Ok(lang.0) } else { Err(anyhow!("No language found")) } @@ -664,25 +1144,90 @@ impl Loader { pub fn use_debug_build(&mut self, flag: bool) { self.debug_build = flag; } + + #[cfg(feature = "wasm")] + pub fn use_wasm(&mut self, engine: tree_sitter::wasmtime::Engine) { + *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap()); + } + + #[must_use] + pub fn get_scanner_path(&self, src_path: &Path) -> Option { + let mut path = src_path.join("scanner.c"); + for extension in ["c", "cc", "cpp"] { + path.set_extension(extension); + if path.exists() { + return Some(path); + } + } + None + } } impl<'a> LanguageConfiguration<'a> { - pub fn highlight_config(&self, language: Language) -> Result> { - return self - .highlight_config + pub fn highlight_config( + &self, + language: Language, + paths: Option<&[String]>, + ) -> Result> { + let (highlights_filenames, injections_filenames, locals_filenames) = match paths { + Some(paths) => ( + Some( + paths + .iter() + .filter(|p| p.ends_with("highlights.scm")) + .cloned() + .collect::>(), + ), + Some( + paths + .iter() + .filter(|p| p.ends_with("tags.scm")) + .cloned() + .collect::>(), + ), + Some( + paths + .iter() + .filter(|p| p.ends_with("locals.scm")) + .cloned() + .collect::>(), + ), + ), + None => (None, None, None), + }; + self.highlight_config .get_or_try_init(|| { - let (highlights_query, highlight_ranges) = - self.read_queries(&self.highlights_filenames, "highlights.scm")?; - let (injections_query, injection_ranges) = - self.read_queries(&self.injections_filenames, "injections.scm")?; - let (locals_query, locals_ranges) = - self.read_queries(&self.locals_filenames, "locals.scm")?; + let (highlights_query, highlight_ranges) = self.read_queries( + if highlights_filenames.is_some() { + highlights_filenames.as_deref() + } else { + self.highlights_filenames.as_deref() + }, + "highlights.scm", + )?; + let (injections_query, injection_ranges) = self.read_queries( + if injections_filenames.is_some() { + injections_filenames.as_deref() + } else { + self.injections_filenames.as_deref() + }, + "injections.scm", + )?; + let (locals_query, locals_ranges) = self.read_queries( + if locals_filenames.is_some() { + locals_filenames.as_deref() + } else { + self.locals_filenames.as_deref() + }, + "locals.scm", + )?; if highlights_query.is_empty() { Ok(None) } else { let mut result = HighlightConfiguration::new( language, + &self.language_name, &highlights_query, &injections_query, &locals_query, @@ -717,25 +1262,26 @@ impl<'a> LanguageConfiguration<'a> { let mut all_highlight_names = self.highlight_names.lock().unwrap(); if self.use_all_highlight_names { for capture_name in result.query.capture_names() { - if !all_highlight_names.contains(capture_name) { - all_highlight_names.push(capture_name.clone()); + if !all_highlight_names.iter().any(|x| x == capture_name) { + all_highlight_names.push((*capture_name).to_string()); } } } - result.configure(&all_highlight_names.as_slice()); + result.configure(all_highlight_names.as_slice()); + drop(all_highlight_names); Ok(Some(result)) } }) - .map(Option::as_ref); + .map(Option::as_ref) } pub fn tags_config(&self, language: Language) -> Result> { self.tags_config .get_or_try_init(|| { let (tags_query, tags_ranges) = - self.read_queries(&self.tags_filenames, "tags.scm")?; + self.read_queries(self.tags_filenames.as_deref(), "tags.scm")?; let (locals_query, locals_ranges) = - self.read_queries(&self.locals_filenames, "locals.scm")?; + self.read_queries(self.locals_filenames.as_deref(), "locals.scm")?; if tags_query.is_empty() { Ok(None) } else { @@ -758,7 +1304,6 @@ impl<'a> LanguageConfiguration<'a> { locals_query.len(), ) } - .into() } else { error.into() } @@ -768,9 +1313,9 @@ impl<'a> LanguageConfiguration<'a> { .map(Option::as_ref) } - fn include_path_in_query_error<'b>( + fn include_path_in_query_error( mut error: QueryError, - ranges: &'b Vec<(String, Range)>, + ranges: &[(String, Range)], source: &str, start_offset: usize, ) -> Error { @@ -778,36 +1323,46 @@ impl<'a> LanguageConfiguration<'a> { let (path, range) = ranges .iter() .find(|(_, range)| range.contains(&offset_within_section)) - .unwrap(); + .unwrap_or_else(|| ranges.last().unwrap()); error.offset = offset_within_section - range.start; error.row = source[range.start..offset_within_section] - .chars() - .filter(|c| *c == '\n') + .matches(|c| c == '\n') .count(); - Error::from(error).context(format!("Error in query file {:?}", path)) + Error::from(error).context(format!("Error in query file {path:?}")) } + #[allow(clippy::type_complexity)] fn read_queries( &self, - paths: &Option>, + paths: Option<&[String]>, default_path: &str, ) -> Result<(String, Vec<(String, Range)>)> { let mut query = String::new(); let mut path_ranges = Vec::new(); - if let Some(paths) = paths.as_ref() { + if let Some(paths) = paths { for path in paths { let abs_path = self.root_path.join(path); let prev_query_len = query.len(); query += &fs::read_to_string(&abs_path) - .with_context(|| format!("Failed to read query file {:?}", path))?; + .with_context(|| format!("Failed to read query file {path:?}"))?; path_ranges.push((path.clone(), prev_query_len..query.len())); } } else { + // highlights.scm is needed to test highlights, and tags.scm to test tags + if default_path == "highlights.scm" || default_path == "tags.scm" { + eprintln!( + indoc! {" + Warning: you should add a `{}` entry pointing to the highlights path in `tree-sitter` language list in the grammar's package.json + See more here: https://tree-sitter.github.io/tree-sitter/syntax-highlighting#query-paths + "}, + default_path.replace(".scm", "") + ); + } let queries_path = self.root_path.join("queries"); let path = queries_path.join(default_path); if path.exists() { query = fs::read_to_string(&path) - .with_context(|| format!("Failed to read query file {:?}", path))?; + .with_context(|| format!("Failed to read query file {path:?}"))?; path_ranges.push((default_path.to_string(), 0..query.len())); } } @@ -816,20 +1371,14 @@ impl<'a> LanguageConfiguration<'a> { } } -fn needs_recompile( - lib_path: &Path, - parser_c_path: &Path, - scanner_path: &Option, -) -> Result { +fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> Result { if !lib_path.exists() { return Ok(true); } - let lib_mtime = mtime(lib_path)?; - if mtime(parser_c_path)? > lib_mtime { - return Ok(true); - } - if let Some(scanner_path) = scanner_path { - if mtime(scanner_path)? > lib_mtime { + let lib_mtime = + mtime(lib_path).with_context(|| format!("Failed to read mtime of {lib_path:?}"))?; + for path in paths_to_check { + if mtime(path)? > lib_mtime { return Ok(true); } } diff --git a/cli/npm/.gitignore b/cli/npm/.gitignore index 942b33a..65e04cf 100644 --- a/cli/npm/.gitignore +++ b/cli/npm/.gitignore @@ -3,3 +3,4 @@ tree-sitter.exe *.gz *.tgz LICENSE +README.md diff --git a/cli/npm/dsl.d.ts b/cli/npm/dsl.d.ts index f2ee57f..63f9ed4 100644 --- a/cli/npm/dsl.d.ts +++ b/cli/npm/dsl.d.ts @@ -1,19 +1,19 @@ -type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string}; -type BlankRule = {type: 'BLANK'}; -type ChoiceRule = {type: 'CHOICE'; members: Rule[]}; -type FieldRule = {type: 'FIELD'; name: string; content: Rule}; -type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule}; -type PatternRule = {type: 'PATTERN'; value: string}; -type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number}; -type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number}; -type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number}; -type PrecRule = {type: 'PREC'; content: Rule; value: number}; -type Repeat1Rule = {type: 'REPEAT1'; content: Rule}; -type RepeatRule = {type: 'REPEAT'; content: Rule}; -type SeqRule = {type: 'SEQ'; members: Rule[]}; -type StringRule = {type: 'STRING'; value: string}; -type SymbolRule = {type: 'SYMBOL'; name: Name}; -type TokenRule = {type: 'TOKEN'; content: Rule}; +type AliasRule = { type: 'ALIAS'; named: boolean; content: Rule; value: string }; +type BlankRule = { type: 'BLANK' }; +type ChoiceRule = { type: 'CHOICE'; members: Rule[] }; +type FieldRule = { type: 'FIELD'; name: string; content: Rule }; +type ImmediateTokenRule = { type: 'IMMEDIATE_TOKEN'; content: Rule }; +type PatternRule = { type: 'PATTERN'; value: string }; +type PrecDynamicRule = { type: 'PREC_DYNAMIC'; content: Rule; value: number }; +type PrecLeftRule = { type: 'PREC_LEFT'; content: Rule; value: number }; +type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number }; +type PrecRule = { type: 'PREC'; content: Rule; value: number }; +type Repeat1Rule = { type: 'REPEAT1'; content: Rule }; +type RepeatRule = { type: 'REPEAT'; content: Rule }; +type SeqRule = { type: 'SEQ'; members: Rule[] }; +type StringRule = { type: 'STRING'; value: string }; +type SymbolRule = { type: 'SYMBOL'; name: Name }; +type TokenRule = { type: 'TOKEN'; content: Rule }; type Rule = | AliasRule @@ -42,14 +42,15 @@ type GrammarSymbols = { type RuleBuilder = ( $: GrammarSymbols, + previous: Rule, ) => RuleOrLiteral; type RuleBuilders< RuleName extends string, BaseGrammarRuleName extends string > = { - [name in RuleName]: RuleBuilder; -}; + [name in RuleName]: RuleBuilder; + }; interface Grammar< RuleName extends string, @@ -68,11 +69,17 @@ interface Grammar< rules: Rules; /** - * An array of arrays of precedence names. Each inner array represents - * a *descending* ordering. Names listed earlier in one of these arrays - * have higher precedence than any names listed later in the same array. + * An array of arrays of precedence names or rules. Each inner array represents + * a *descending* ordering. Names/rules listed earlier in one of these arrays + * have higher precedence than any names/rules listed later in the same array. + * + * Using rules is just a shorthand way for using a name then calling prec() + * with that name. It is just a convenience. */ - precedences?: () => String[][], + precedences?: ( + $: GrammarSymbols, + previous: Rule[][], + ) => RuleOrLiteral[][], /** * An array of arrays of rule names. Each inner array represents a set of @@ -86,6 +93,7 @@ interface Grammar< */ conflicts?: ( $: GrammarSymbols, + previous: Rule[][], ) => RuleOrLiteral[][]; /** @@ -102,7 +110,7 @@ interface Grammar< externals?: ( $: Record>, previous: Rule[], - ) => SymbolRule[]; + ) => RuleOrLiteral[]; /** * An array of tokens that may appear anywhere in the language. This @@ -126,6 +134,7 @@ interface Grammar< */ inline?: ( $: GrammarSymbols, + previous: Rule[], ) => RuleOrLiteral[]; /** @@ -134,10 +143,11 @@ interface Grammar< * * @param $ grammar rules * - * @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types + * @see https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types */ supertypes?: ( $: GrammarSymbols, + previous: Rule[], ) => RuleOrLiteral[]; /** @@ -153,8 +163,8 @@ interface Grammar< type GrammarSchema = { [K in keyof Grammar]: K extends 'rules' - ? Record - : Grammar[K]; + ? Record + : Grammar[K]; }; /** diff --git a/cli/npm/install.js b/cli/npm/install.js index 2790b47..c18ae6b 100755 --- a/cli/npm/install.js +++ b/cli/npm/install.js @@ -6,25 +6,43 @@ const http = require('http'); const https = require('https'); const packageJSON = require('./package.json'); -// Determine the URL of the file. -const platformName = { - 'darwin': 'macos', - 'linux': 'linux', - 'win32': 'windows' -}[process.platform]; - -let archName = { - 'x64': 'x64', - 'x86': 'x86', - 'ia32': 'x86' -}[process.arch]; - -// ARM macs can run x64 binaries via Rosetta. Rely on that for now. -if (platformName === 'macos' && process.arch === 'arm64') { - archName = 'x64'; +// Look to a results table in https://github.com/tree-sitter/tree-sitter/issues/2196 +const matrix = { + platform: { + 'darwin': { + name: 'macos', + arch: { + 'arm64': { name: 'arm64' }, + 'x64': { name: 'x64' }, + } + }, + 'linux': { + name: 'linux', + arch: { + 'arm64': { name: 'arm64' }, + 'arm': { name: 'arm' }, + 'x64': { name: 'x64' }, + 'x86': { name: 'x86' }, + 'ppc64': { name: 'powerpc64' }, + } + }, + 'win32': { + name: 'windows', + arch: { + 'arm64': { name: 'arm64' }, + 'x64': { name: 'x64' }, + 'x86': { name: 'x86' }, + 'ia32': { name: 'x86' }, + } + }, + }, } -if (!platformName || !archName) { +// Determine the URL of the file. +const platform = matrix.platform[process.platform]; +const arch = platform?.arch[process.arch]; + +if (!platform || !platform.name || !arch || !arch.name) { console.error( `Cannot install tree-sitter-cli for platform ${process.platform}, architecture ${process.arch}` ); @@ -32,7 +50,7 @@ if (!platformName || !archName) { } const releaseURL = `https://github.com/tree-sitter/tree-sitter/releases/download/v${packageJSON.version}`; -const assetName = `tree-sitter-${platformName}-${archName}.gz`; +const assetName = `tree-sitter-${platform.name}-${arch.name}.gz`; const assetURL = `${releaseURL}/${assetName}`; // Remove previously-downloaded files. @@ -65,29 +83,42 @@ file.on('finish', () => { // Follow redirects. function get(url, callback) { - const requestUrl = new URL(url) - let request = https - let requestConfig = requestUrl - const proxyEnv = process.env['HTTPS_PROXY'] || process.env['https_proxy'] - - if (proxyEnv) { - const proxyUrl = new URL(proxyEnv) - request = proxyUrl.protocol === 'https:' ? https : http - requestConfig = { - hostname: proxyUrl.hostname, - port: proxyUrl.port, - path: requestUrl.toString(), - headers: { - Host: requestUrl.hostname - } - } - } - - request.get(requestConfig, response => { + const processResponse = (response) => { if (response.statusCode === 301 || response.statusCode === 302) { get(response.headers.location, callback); } else { callback(response); } - }); + }; + + const proxyEnv = process.env.HTTPS_PROXY || process.env.https_proxy; + if (!proxyEnv) { + https.get(url, processResponse); + return; + } + + const requestUrl = new URL(url); + const requestPort = requestUrl.port || (requestUrl.protocol === 'https:' ? 443 : 80); + const proxyUrl = new URL(proxyEnv); + const request = proxyUrl.protocol === 'https:' ? https : http; + request.request({ + host: proxyUrl.hostname, + port: proxyUrl.port || (proxyUrl.protocol === 'https:' ? 443 : 80), + method: 'CONNECT', + path: `${requestUrl.hostname}:${requestPort}`, + }).on('connect', (response, socket, _head) => { + if (response.statusCode !== 200) { + // let caller handle error + callback(response); + return; + } + + const agent = https.Agent({ socket }); + https.get({ + host: requestUrl.host, + port: requestPort, + path: `${requestUrl.pathname}${requestUrl.search}`, + agent, + }, processResponse); + }).end(); } diff --git a/cli/npm/package.json b/cli/npm/package.json index 0230919..b072c89 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,11 +1,11 @@ { "name": "tree-sitter-cli", - "version": "0.20.8", + "version": "0.22.6", "author": "Max Brunsfeld", "license": "MIT", "repository": { "type": "git", - "url": "http://github.com/tree-sitter/tree-sitter.git" + "url": "https://github.com/tree-sitter/tree-sitter.git" }, "description": "CLI for generating fast incremental parsers", "keywords": [ @@ -15,7 +15,8 @@ "main": "lib/api/index.js", "scripts": { "install": "node install.js", - "prepack": "cp ../../LICENSE ." + "prepack": "cp ../../LICENSE ../README.md .", + "postpack": "rm LICENSE README.md" }, "bin": { "tree-sitter": "cli.js" diff --git a/cli/src/generate/binding_files.rs b/cli/src/generate/binding_files.rs deleted file mode 100644 index 4241b61..0000000 --- a/cli/src/generate/binding_files.rs +++ /dev/null @@ -1,154 +0,0 @@ -use super::write_file; -use anyhow::{Context, Result}; -use std::path::{Path, PathBuf}; -use std::{fs, str}; - -const BINDING_CC_TEMPLATE: &'static str = include_str!("./templates/binding.cc"); -const BINDING_GYP_TEMPLATE: &'static str = include_str!("./templates/binding.gyp"); -const INDEX_JS_TEMPLATE: &'static str = include_str!("./templates/index.js"); -const LIB_RS_TEMPLATE: &'static str = include_str!("./templates/lib.rs"); -const BUILD_RS_TEMPLATE: &'static str = include_str!("./templates/build.rs"); -const CARGO_TOML_TEMPLATE: &'static str = include_str!("./templates/cargo.toml"); -const PACKAGE_JSON_TEMPLATE: &'static str = include_str!("./templates/package.json"); -const PARSER_NAME_PLACEHOLDER: &'static str = "PARSER_NAME"; -const CLI_VERSION_PLACEHOLDER: &'static str = "CLI_VERSION"; -const CLI_VERSION: &'static str = env!("CARGO_PKG_VERSION"); -const RUST_BINDING_VERSION: &'static str = env!("RUST_BINDING_VERSION"); -const RUST_BINDING_VERSION_PLACEHOLDER: &'static str = "RUST_BINDING_VERSION"; - -pub fn generate_binding_files(repo_path: &Path, language_name: &str) -> Result<()> { - let bindings_dir = repo_path.join("bindings"); - - let dashed_language_name = language_name.replace("_", "-"); - let dashed_language_name = dashed_language_name.as_str(); - - // Generate rust bindings if needed. - let rust_binding_dir = bindings_dir.join("rust"); - create_path(&rust_binding_dir, |path| create_dir(path))?; - - create_path(&rust_binding_dir.join("lib.rs").to_owned(), |path| { - generate_file(path, LIB_RS_TEMPLATE, language_name) - })?; - - create_path(&rust_binding_dir.join("build.rs").to_owned(), |path| { - generate_file(path, BUILD_RS_TEMPLATE, language_name) - })?; - - create_path(&repo_path.join("Cargo.toml").to_owned(), |path| { - generate_file(path, CARGO_TOML_TEMPLATE, dashed_language_name) - })?; - - // Generate node bindings - let node_binding_dir = bindings_dir.join("node"); - create_path(&node_binding_dir, |path| create_dir(path))?; - - create_path(&node_binding_dir.join("index.js").to_owned(), |path| { - generate_file(path, INDEX_JS_TEMPLATE, language_name) - })?; - - create_path(&node_binding_dir.join("binding.cc").to_owned(), |path| { - generate_file(path, BINDING_CC_TEMPLATE, language_name) - })?; - - // Create binding.gyp, or update it with new binding path. - let binding_gyp_path = repo_path.join("binding.gyp"); - create_path_else( - &binding_gyp_path, - |path| generate_file(path, BINDING_GYP_TEMPLATE, language_name), - |path| { - let binding_gyp = - fs::read_to_string(path).with_context(|| "Failed to read binding.gyp")?; - let old_path = "\"src/binding.cc\""; - if binding_gyp.contains(old_path) { - eprintln!("Updating binding.gyp with new binding path"); - let binding_gyp = binding_gyp.replace(old_path, "\"bindings/node/binding.cc\""); - write_file(path, binding_gyp)?; - } - Ok(()) - }, - )?; - - // Create package.json, or update it with new binding path. - let package_json_path = repo_path.join("package.json"); - create_path_else( - &package_json_path, - |path| generate_file(path, PACKAGE_JSON_TEMPLATE, dashed_language_name), - |path| { - let package_json_str = - fs::read_to_string(path).with_context(|| "Failed to read package.json")?; - let mut package_json = - serde_json::from_str::>( - &package_json_str, - ) - .with_context(|| "Failed to parse package.json")?; - let package_json_main = package_json.get("main"); - let package_json_needs_update = package_json_main.map_or(true, |v| { - let main_string = v.as_str(); - main_string == Some("index.js") || main_string == Some("./index.js") - }); - if package_json_needs_update { - eprintln!("Updating package.json with new binding path"); - package_json.insert( - "main".to_string(), - serde_json::Value::String("bindings/node".to_string()), - ); - let mut package_json_str = serde_json::to_string_pretty(&package_json)?; - package_json_str.push('\n'); - write_file(path, package_json_str)?; - } - Ok(()) - }, - )?; - - // Remove files from old node binding paths. - let old_index_js_path = repo_path.join("index.js"); - let old_binding_cc_path = repo_path.join("src").join("binding.cc"); - if old_index_js_path.exists() { - fs::remove_file(old_index_js_path).ok(); - } - if old_binding_cc_path.exists() { - fs::remove_file(old_binding_cc_path).ok(); - } - - Ok(()) -} - -fn generate_file(path: &Path, template: &str, language_name: &str) -> Result<()> { - write_file( - path, - template - .replace(PARSER_NAME_PLACEHOLDER, language_name) - .replace(CLI_VERSION_PLACEHOLDER, CLI_VERSION) - .replace(RUST_BINDING_VERSION_PLACEHOLDER, RUST_BINDING_VERSION), - ) -} - -fn create_dir(path: &Path) -> Result<()> { - fs::create_dir_all(&path) - .with_context(|| format!("Failed to create {:?}", path.to_string_lossy())) -} - -fn create_path(path: &PathBuf, action: F) -> Result -where - F: Fn(&PathBuf) -> Result<()>, -{ - if !path.exists() { - action(path)?; - return Ok(true); - } - Ok(false) -} - -fn create_path_else(path: &PathBuf, action: T, else_action: F) -> Result -where - T: Fn(&PathBuf) -> Result<()>, - F: Fn(&PathBuf) -> Result<()>, -{ - if !path.exists() { - action(path)?; - return Ok(true); - } else { - else_action(path)?; - } - Ok(false) -} diff --git a/cli/src/generate/build_tables/build_lex_table.rs b/cli/src/generate/build_tables/build_lex_table.rs index d3ebb24..f7bff0d 100644 --- a/cli/src/generate/build_tables/build_lex_table.rs +++ b/cli/src/generate/build_tables/build_lex_table.rs @@ -1,40 +1,51 @@ -use super::coincident_tokens::CoincidentTokenIndex; -use super::token_conflicts::TokenConflictMap; -use crate::generate::dedup::split_state_id_groups; -use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; -use crate::generate::nfa::NfaCursor; -use crate::generate::rules::{Symbol, TokenSet}; -use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable}; +use std::{ + collections::{hash_map::Entry, HashMap, VecDeque}, + mem, +}; + use log::info; -use std::collections::hash_map::Entry; -use std::collections::{HashMap, VecDeque}; -use std::mem; -pub(crate) fn build_lex_table( +use super::{coincident_tokens::CoincidentTokenIndex, token_conflicts::TokenConflictMap}; +use crate::generate::{ + dedup::split_state_id_groups, + grammars::{LexicalGrammar, SyntaxGrammar}, + nfa::{CharacterSet, NfaCursor}, + rules::{Symbol, TokenSet}, + tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable}, +}; + +pub const LARGE_CHARACTER_RANGE_COUNT: usize = 8; + +pub struct LexTables { + pub main_lex_table: LexTable, + pub keyword_lex_table: LexTable, + pub large_character_sets: Vec<(Option, CharacterSet)>, +} + +pub fn build_lex_table( parse_table: &mut ParseTable, syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, keywords: &TokenSet, coincident_token_index: &CoincidentTokenIndex, token_conflict_map: &TokenConflictMap, -) -> (LexTable, LexTable) { - let keyword_lex_table; - if syntax_grammar.word_token.is_some() { +) -> LexTables { + let keyword_lex_table = if syntax_grammar.word_token.is_some() { let mut builder = LexTableBuilder::new(lexical_grammar); builder.add_state_for_tokens(keywords); - keyword_lex_table = builder.table; + builder.table } else { - keyword_lex_table = LexTable::default(); - } + LexTable::default() + }; - let mut parse_state_ids_by_token_set: Vec<(TokenSet, Vec)> = Vec::new(); + let mut parse_state_ids_by_token_set = Vec::<(TokenSet, Vec)>::new(); for (i, state) in parse_table.states.iter().enumerate() { let tokens = state .terminal_entries .keys() .filter_map(|token| { if token.is_terminal() { - if keywords.contains(&token) { + if keywords.contains(token) { syntax_grammar.word_token } else { Some(*token) @@ -48,7 +59,7 @@ pub(crate) fn build_lex_table( .collect(); let mut did_merge = false; - for entry in parse_state_ids_by_token_set.iter_mut() { + for entry in &mut parse_state_ids_by_token_set { if merge_token_set( &mut entry.0, &tokens, @@ -75,10 +86,45 @@ pub(crate) fn build_lex_table( } } - let mut table = builder.table; - minimize_lex_table(&mut table, parse_table); - sort_states(&mut table, parse_table); - (table, keyword_lex_table) + let mut main_lex_table = mem::take(&mut builder.table); + minimize_lex_table(&mut main_lex_table, parse_table); + sort_states(&mut main_lex_table, parse_table); + + let mut large_character_sets = Vec::new(); + for (variable_ix, _variable) in lexical_grammar.variables.iter().enumerate() { + let symbol = Symbol::terminal(variable_ix); + builder.reset(); + builder.add_state_for_tokens(&TokenSet::from_iter([symbol])); + for state in &builder.table.states { + let mut characters = CharacterSet::empty(); + for (chars, action) in &state.advance_actions { + if action.in_main_token { + characters = characters.add(chars); + continue; + } + + if chars.range_count() > LARGE_CHARACTER_RANGE_COUNT + && !large_character_sets.iter().any(|(_, set)| set == chars) + { + large_character_sets.push((None, chars.clone())); + } + } + + if characters.range_count() > LARGE_CHARACTER_RANGE_COUNT + && !large_character_sets + .iter() + .any(|(_, set)| *set == characters) + { + large_character_sets.push((Some(symbol), characters)); + } + } + } + + LexTables { + main_lex_table, + keyword_lex_table, + large_character_sets, + } } struct QueueEntry { @@ -106,6 +152,12 @@ impl<'a> LexTableBuilder<'a> { } } + fn reset(&mut self) { + self.table = LexTable::default(); + self.state_queue.clear(); + self.state_ids_by_nfa_state_set.clear(); + } + fn add_state_for_tokens(&mut self, tokens: &TokenSet) -> usize { let mut eof_valid = false; let nfa_states = tokens @@ -198,7 +250,7 @@ impl<'a> LexTableBuilder<'a> { for transition in transitions { if let Some((completed_id, completed_precedence)) = completion { if !TokenConflictMap::prefer_transition( - &self.lexical_grammar, + self.lexical_grammar, &transition, completed_id, completed_precedence, @@ -248,12 +300,11 @@ fn merge_token_set( { return false; } - if !coincident_token_index.contains(symbol, existing_token) { - if token_conflict_map.does_overlap(existing_token.index, i) - || token_conflict_map.does_overlap(i, existing_token.index) - { - return false; - } + if !coincident_token_index.contains(symbol, existing_token) + && (token_conflict_map.does_overlap(existing_token.index, i) + || token_conflict_map.does_overlap(i, existing_token.index)) + { + return false; } } } @@ -315,7 +366,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { let mut new_state = LexState::default(); mem::swap(&mut new_state, &mut table.states[state_ids[0]]); - for (_, advance_action) in new_state.advance_actions.iter_mut() { + for (_, advance_action) in &mut new_state.advance_actions { advance_action.state = group_ids_by_state_id[advance_action.state]; } if let Some(eof_action) = &mut new_state.eof_action { @@ -324,18 +375,14 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { new_states.push(new_state); } - for state in parse_table.states.iter_mut() { + for state in &mut parse_table.states { state.lex_state_id = group_ids_by_state_id[state.lex_state_id]; } table.states = new_states; } -fn lex_states_differ( - left: &LexState, - right: &LexState, - group_ids_by_state_id: &Vec, -) -> bool { +fn lex_states_differ(left: &LexState, right: &LexState, group_ids_by_state_id: &[usize]) -> bool { left.advance_actions .iter() .zip(right.advance_actions.iter()) @@ -362,7 +409,7 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) { .map(|old_id| { let mut state = LexState::default(); mem::swap(&mut state, &mut table.states[*old_id]); - for (_, advance_action) in state.advance_actions.iter_mut() { + for (_, advance_action) in &mut state.advance_actions { advance_action.state = new_ids_by_old_id[advance_action.state]; } if let Some(eof_action) = &mut state.eof_action { @@ -373,7 +420,7 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) { .collect(); // Update the parse table's lex state references - for state in parse_table.states.iter_mut() { + for state in &mut parse_table.states { state.lex_state_id = new_ids_by_old_id[state.lex_state_id]; } } diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 1032026..2d22e21 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -1,31 +1,36 @@ -use super::item::{ParseItem, ParseItemSet, ParseItemSetCore}; -use super::item_set_builder::ParseItemSetBuilder; -use crate::generate::grammars::PrecedenceEntry; -use crate::generate::grammars::{ - InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType, +use std::{ + cmp::Ordering, + collections::{BTreeMap, HashMap, HashSet, VecDeque}, + fmt::Write, + hash::BuildHasherDefault, }; -use crate::generate::node_types::VariableInfo; -use crate::generate::rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet}; -use crate::generate::tables::{ - FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, - ProductionInfo, ProductionInfoId, -}; -use anyhow::{anyhow, Result}; -use std::cmp::Ordering; -use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; -use std::fmt::Write; -use std::hash::BuildHasherDefault; -use std::u32; +use anyhow::{anyhow, Result}; use indexmap::{map::Entry, IndexMap}; use rustc_hash::FxHasher; +use super::{ + item::{ParseItem, ParseItemSet, ParseItemSetCore}, + item_set_builder::ParseItemSetBuilder, +}; +use crate::generate::{ + grammars::{ + InlinedProductionMap, LexicalGrammar, PrecedenceEntry, SyntaxGrammar, VariableType, + }, + node_types::VariableInfo, + rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet}, + tables::{ + FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, + ParseTableEntry, ProductionInfo, ProductionInfoId, + }, +}; + // For conflict reporting, each parse state is associated with an example // sequence of symbols that could lead to that parse state. type SymbolSequence = Vec; type AuxiliarySymbolSequence = Vec; -pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>); +pub type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>); #[derive(Clone)] struct AuxiliarySymbolInfo { @@ -51,12 +56,13 @@ struct ParseTableBuilder<'a> { item_set_builder: ParseItemSetBuilder<'a>, syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, - variable_info: &'a Vec, + variable_info: &'a [VariableInfo], core_ids_by_core: HashMap, usize>, state_ids_by_item_set: IndexMap, ParseStateId, BuildHasherDefault>, parse_state_info_by_id: Vec>, parse_state_queue: VecDeque, non_terminal_extra_states: Vec<(Symbol, usize)>, + actual_conflicts: HashSet>, parse_table: ParseTable, } @@ -74,14 +80,10 @@ impl<'a> ParseTableBuilder<'a> { self.add_parse_state( &Vec::new(), &Vec::new(), - ParseItemSet::with( - [( - ParseItem::start(), - [Symbol::end()].iter().cloned().collect(), - )] - .iter() - .cloned(), - ), + ParseItemSet::with(std::iter::once(( + ParseItem::start(), + std::iter::once(&Symbol::end()).copied().collect(), + ))), ); // Compute the possible item sets for non-terminal extras. @@ -96,7 +98,7 @@ impl<'a> ParseTableBuilder<'a> { for production in &variable.productions { non_terminal_extra_item_sets_by_first_terminal .entry(production.first_symbol().unwrap()) - .or_insert(ParseItemSet::default()) + .or_insert_with(ParseItemSet::default) .insert( ParseItem { variable_index: extra_non_terminal.index as u32, @@ -104,9 +106,8 @@ impl<'a> ParseTableBuilder<'a> { step_index: 1, has_preceding_inherited_fields: false, }, - &[Symbol::end_of_nonterminal_extra()] - .iter() - .cloned() + &std::iter::once(&Symbol::end_of_nonterminal_extra()) + .copied() .collect(), ); } @@ -128,10 +129,24 @@ impl<'a> ParseTableBuilder<'a> { self.parse_state_info_by_id[entry.state_id].0.clone(), entry.preceding_auxiliary_symbols, entry.state_id, - item_set, + &item_set, )?; } + if !self.actual_conflicts.is_empty() { + println!("Warning: unnecessary conflicts"); + for conflict in &self.actual_conflicts { + println!( + " {}", + conflict + .iter() + .map(|symbol| format!("`{}`", self.symbol_name(symbol))) + .collect::>() + .join(", ") + ); + } + } + Ok((self.parse_table, self.parse_state_info_by_id)) } @@ -180,7 +195,7 @@ impl<'a> ParseTableBuilder<'a> { mut preceding_symbols: SymbolSequence, mut preceding_auxiliary_symbols: Vec, state_id: ParseStateId, - item_set: ParseItemSet<'a>, + item_set: &ParseItemSet<'a>, ) -> Result<()> { let mut terminal_successors = BTreeMap::new(); let mut non_terminal_successors = BTreeMap::new(); @@ -203,7 +218,7 @@ impl<'a> ParseTableBuilder<'a> { // for conflict resolution. if variable.is_auxiliary() { preceding_auxiliary_symbols - .push(self.get_auxiliary_node_info(&item_set, next_symbol)); + .push(self.get_auxiliary_node_info(item_set, next_symbol)); } // For most parse items, the symbols associated with the preceding children @@ -223,12 +238,12 @@ impl<'a> ParseTableBuilder<'a> { non_terminal_successors .entry(next_symbol) - .or_insert_with(|| ParseItemSet::default()) + .or_insert_with(ParseItemSet::default) .insert(successor, lookaheads); } else { terminal_successors .entry(next_symbol) - .or_insert_with(|| ParseItemSet::default()) + .or_insert_with(ParseItemSet::default) .insert(successor, lookaheads); } } @@ -253,7 +268,7 @@ impl<'a> ParseTableBuilder<'a> { let table_entry = self.parse_table.states[state_id] .terminal_entries .entry(lookahead) - .or_insert_with(|| ParseTableEntry::new()); + .or_insert_with(ParseTableEntry::new); let reduction_info = reduction_infos.entry(lookahead).or_default(); // While inserting Reduce actions, eagerly resolve conflicts related @@ -263,7 +278,7 @@ impl<'a> ParseTableBuilder<'a> { table_entry.actions.push(action); } else { match Self::compare_precedence( - &self.syntax_grammar, + self.syntax_grammar, precedence, &[symbol], &reduction_info.precedence, @@ -283,7 +298,7 @@ impl<'a> ParseTableBuilder<'a> { } } - reduction_info.precedence = precedence.clone(); + reduction_info.precedence.clone_from(precedence); if let Err(i) = reduction_info.symbols.binary_search(&symbol) { reduction_info.symbols.insert(i, symbol); } @@ -296,7 +311,7 @@ impl<'a> ParseTableBuilder<'a> { } } - // Having computed the the successor item sets for each symbol, add a new + // Having computed the successor item sets for each symbol, add a new // parse state for each of these item sets, and add a corresponding Shift // action to this state. for (symbol, next_item_set) in terminal_successors { @@ -318,7 +333,7 @@ impl<'a> ParseTableBuilder<'a> { } entry - .or_insert_with(|| ParseTableEntry::new()) + .or_insert_with(ParseTableEntry::new) .actions .push(ParseAction::Shift { state: next_state_id, @@ -346,7 +361,7 @@ impl<'a> ParseTableBuilder<'a> { // * fail, terminating the parser generation process for symbol in lookaheads_with_conflicts.iter() { self.handle_conflict( - &item_set, + item_set, state_id, &preceding_symbols, &preceding_auxiliary_symbols, @@ -429,7 +444,7 @@ impl<'a> ParseTableBuilder<'a> { item_set: &ParseItemSet, state_id: ParseStateId, preceding_symbols: &SymbolSequence, - preceding_auxiliary_symbols: &Vec, + preceding_auxiliary_symbols: &[AuxiliarySymbolInfo], conflicting_lookahead: Symbol, reduction_info: &ReductionInfo, ) -> Result<()> { @@ -445,33 +460,31 @@ impl<'a> ParseTableBuilder<'a> { // REDUCE-REDUCE conflicts where all actions have the *same* // precedence, and there can still be SHIFT/REDUCE conflicts. let mut considered_associativity = false; - let mut shift_precedence: Vec<(&Precedence, Symbol)> = Vec::new(); + let mut shift_precedence = Vec::<(&Precedence, Symbol)>::new(); let mut conflicting_items = HashSet::new(); for (item, lookaheads) in &item_set.entries { if let Some(step) = item.step() { - if item.step_index > 0 { - if self + if item.step_index > 0 + && self .item_set_builder .first_set(&step.symbol) .contains(&conflicting_lookahead) - { - if item.variable_index != u32::MAX { - conflicting_items.insert(item); - } + { + if item.variable_index != u32::MAX { + conflicting_items.insert(item); + } - let p = ( - item.precedence(), - Symbol::non_terminal(item.variable_index as usize), - ); - if let Err(i) = shift_precedence.binary_search(&p) { - shift_precedence.insert(i, p); - } + let p = ( + item.precedence(), + Symbol::non_terminal(item.variable_index as usize), + ); + if let Err(i) = shift_precedence.binary_search(&p) { + shift_precedence.insert(i, p); } } - } else if lookaheads.contains(&conflicting_lookahead) { - if item.variable_index != u32::MAX { - conflicting_items.insert(item); - } + } else if lookaheads.contains(&conflicting_lookahead) && item.variable_index != u32::MAX + { + conflicting_items.insert(item); } } @@ -497,7 +510,7 @@ impl<'a> ParseTableBuilder<'a> { let mut shift_is_more = false; for p in shift_precedence { match Self::compare_precedence( - &self.syntax_grammar, + self.syntax_grammar, p.0, &[p.1], &reduction_info.precedence, @@ -582,6 +595,7 @@ impl<'a> ParseTableBuilder<'a> { .expected_conflicts .contains(&actual_conflict) { + self.actual_conflicts.remove(&actual_conflict); return Ok(()); } @@ -590,13 +604,13 @@ impl<'a> ParseTableBuilder<'a> { write!(&mut msg, " {}", self.symbol_name(symbol)).unwrap(); } - write!( + writeln!( &mut msg, - " • {} …\n\n", + " • {} …\n", self.symbol_name(&conflicting_lookahead) ) .unwrap(); - write!(&mut msg, "Possible interpretations:\n\n").unwrap(); + writeln!(&mut msg, "Possible interpretations:\n").unwrap(); let mut interpretations = conflicting_items .iter() @@ -639,11 +653,10 @@ impl<'a> ParseTableBuilder<'a> { let prec_line = if let Some(associativity) = associativity { Some(format!( - "(precedence: {}, associativity: {:?})", - precedence, associativity + "(precedence: {precedence}, associativity: {associativity:?})", )) } else if !precedence.is_none() { - Some(format!("(precedence: {})", precedence)) + Some(format!("(precedence: {precedence})")) } else { None }; @@ -672,7 +685,7 @@ impl<'a> ParseTableBuilder<'a> { } let mut resolution_count = 0; - write!(&mut msg, "\nPossible resolutions:\n\n").unwrap(); + writeln!(&mut msg, "\nPossible resolutions:\n").unwrap(); let mut shift_items = Vec::new(); let mut reduce_items = Vec::new(); for item in conflicting_items { @@ -707,24 +720,22 @@ impl<'a> ParseTableBuilder<'a> { }; if actual_conflict.len() > 1 { - if shift_items.len() > 0 { + if !shift_items.is_empty() { resolution_count += 1; write!( &mut msg, - " {}: Specify a higher precedence in", - resolution_count + " {resolution_count}: Specify a higher precedence in", ) .unwrap(); list_rule_names(&mut msg, &shift_items); - write!(&mut msg, " than in the other rules.\n").unwrap(); + writeln!(&mut msg, " than in the other rules.").unwrap(); } for item in &reduce_items { resolution_count += 1; - write!( + writeln!( &mut msg, - " {}: Specify a higher precedence in `{}` than in the other rules.\n", - resolution_count, + " {resolution_count}: Specify a higher precedence in `{}` than in the other rules.", self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)) ) .unwrap(); @@ -735,19 +746,17 @@ impl<'a> ParseTableBuilder<'a> { resolution_count += 1; write!( &mut msg, - " {}: Specify a left or right associativity in", - resolution_count + " {resolution_count}: Specify a left or right associativity in", ) .unwrap(); list_rule_names(&mut msg, &reduce_items); - write!(&mut msg, "\n").unwrap(); + writeln!(&mut msg).unwrap(); } resolution_count += 1; write!( &mut msg, - " {}: Add a conflict for these rules: ", - resolution_count + " {resolution_count}: Add a conflict for these rules: ", ) .unwrap(); for (i, symbol) in actual_conflict.iter().enumerate() { @@ -756,7 +765,7 @@ impl<'a> ParseTableBuilder<'a> { } write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap(); } - write!(&mut msg, "\n").unwrap(); + writeln!(&mut msg).unwrap(); Err(anyhow!(msg)) } @@ -789,7 +798,7 @@ impl<'a> ParseTableBuilder<'a> { // and to the default precedence, which is zero. (Precedence::Integer(l), Precedence::Integer(r)) if *l != 0 || *r != 0 => l.cmp(r), (Precedence::Integer(l), Precedence::None) if *l != 0 => l.cmp(&0), - (Precedence::None, Precedence::Integer(r)) if *r != 0 => 0.cmp(&r), + (Precedence::None, Precedence::Integer(r)) if *r != 0 => 0.cmp(r), // Named precedences can be compared to other named precedences. _ => grammar @@ -856,7 +865,7 @@ impl<'a> ParseTableBuilder<'a> { production_info .field_map .entry(field_name.clone()) - .or_insert(Vec::new()) + .or_default() .push(FieldLocation { index: i, inherited: false, @@ -869,11 +878,11 @@ impl<'a> ParseTableBuilder<'a> { .is_visible() { let info = &self.variable_info[step.symbol.index]; - for (field_name, _) in &info.fields { + for field_name in info.fields.keys() { production_info .field_map .entry(field_name.clone()) - .or_insert(Vec::new()) + .or_default() .push(FieldLocation { index: i, inherited: true, @@ -887,7 +896,7 @@ impl<'a> ParseTableBuilder<'a> { } if item.production.steps.len() > self.parse_table.max_aliased_production_length { - self.parse_table.max_aliased_production_length = item.production.steps.len() + self.parse_table.max_aliased_production_length = item.production.steps.len(); } if let Some(index) = self @@ -923,7 +932,7 @@ impl<'a> ParseTableBuilder<'a> { } fn populate_following_tokens( - result: &mut Vec, + result: &mut [TokenSet], grammar: &SyntaxGrammar, inlines: &InlinedProductionMap, builder: &ParseItemSetBuilder, @@ -934,7 +943,6 @@ fn populate_following_tokens( .flat_map(|v| &v.productions) .chain(&inlines.productions); let all_tokens = (0..result.len()) - .into_iter() .map(Symbol::terminal) .collect::(); for production in productions { @@ -953,17 +961,18 @@ fn populate_following_tokens( for entry in result.iter_mut() { entry.insert(*extra); } - result[extra.index] = all_tokens.clone(); + result[extra.index].clone_from(&all_tokens); } } } -pub(crate) fn build_parse_table<'a>( +pub fn build_parse_table<'a>( syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, inlines: &'a InlinedProductionMap, - variable_info: &'a Vec, + variable_info: &'a [VariableInfo], ) -> Result<(ParseTable, Vec, Vec>)> { + let actual_conflicts = syntax_grammar.expected_conflicts.iter().cloned().collect(); let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines); let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()]; populate_following_tokens( @@ -979,6 +988,7 @@ pub(crate) fn build_parse_table<'a>( item_set_builder, variable_info, non_terminal_extra_states: Vec::new(), + actual_conflicts, state_ids_by_item_set: IndexMap::default(), core_ids_by_core: HashMap::new(), parse_state_info_by_id: Vec::new(), diff --git a/cli/src/generate/build_tables/coincident_tokens.rs b/cli/src/generate/build_tables/coincident_tokens.rs index bb234c4..9341145 100644 --- a/cli/src/generate/build_tables/coincident_tokens.rs +++ b/cli/src/generate/build_tables/coincident_tokens.rs @@ -1,9 +1,12 @@ -use crate::generate::grammars::LexicalGrammar; -use crate::generate::rules::Symbol; -use crate::generate::tables::{ParseStateId, ParseTable}; use std::fmt; -pub(crate) struct CoincidentTokenIndex<'a> { +use crate::generate::{ + grammars::LexicalGrammar, + rules::Symbol, + tables::{ParseStateId, ParseTable}, +}; + +pub struct CoincidentTokenIndex<'a> { entries: Vec>, grammar: &'a LexicalGrammar, n: usize, @@ -23,7 +26,7 @@ impl<'a> CoincidentTokenIndex<'a> { for other_symbol in state.terminal_entries.keys() { if other_symbol.is_terminal() { let index = result.index(symbol.index, other_symbol.index); - if result.entries[index].last().cloned() != Some(i) { + if result.entries[index].last().copied() != Some(i) { result.entries[index].push(i); } } @@ -34,7 +37,7 @@ impl<'a> CoincidentTokenIndex<'a> { result } - pub fn states_with(&self, a: Symbol, b: Symbol) -> &Vec { + pub fn states_with(&self, a: Symbol, b: Symbol) -> &[ParseStateId] { &self.entries[self.index(a.index, b.index)] } @@ -42,7 +45,8 @@ impl<'a> CoincidentTokenIndex<'a> { !self.entries[self.index(a.index, b.index)].is_empty() } - fn index(&self, a: usize, b: usize) -> usize { + #[must_use] + const fn index(&self, a: usize, b: usize) -> usize { if a < b { a * self.n + b } else { @@ -53,20 +57,20 @@ impl<'a> CoincidentTokenIndex<'a> { impl<'a> fmt::Debug for CoincidentTokenIndex<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "CoincidentTokenIndex {{\n")?; + writeln!(f, "CoincidentTokenIndex {{")?; - write!(f, " entries: {{\n")?; + writeln!(f, " entries: {{")?; for i in 0..self.n { - write!(f, " {}: {{\n", self.grammar.variables[i].name)?; + writeln!(f, " {}: {{", self.grammar.variables[i].name)?; for j in 0..self.n { - write!( + writeln!( f, - " {}: {:?},\n", + " {}: {:?},", self.grammar.variables[j].name, self.entries[self.index(i, j)].len() )?; } - write!(f, " }},\n")?; + writeln!(f, " }},")?; } write!(f, " }},")?; write!(f, "}}")?; diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs index 32b1a8d..da19c4b 100644 --- a/cli/src/generate/build_tables/item.rs +++ b/cli/src/generate/build_tables/item.rs @@ -1,10 +1,15 @@ -use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar}; -use crate::generate::rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet}; +use std::{ + cmp::Ordering, + fmt, + hash::{Hash, Hasher}, +}; + use lazy_static::lazy_static; -use std::cmp::Ordering; -use std::fmt; -use std::hash::{Hash, Hasher}; -use std::u32; + +use crate::generate::{ + grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar}, + rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet}, +}; lazy_static! { static ref START_PRODUCTION: Production = Production { @@ -22,9 +27,9 @@ lazy_static! { }; } -/// A ParseItem represents an in-progress match of a single production in a grammar. +/// A [`ParseItem`] represents an in-progress match of a single production in a grammar. #[derive(Clone, Copy, Debug)] -pub(crate) struct ParseItem<'a> { +pub struct ParseItem<'a> { /// The index of the parent rule within the grammar. pub variable_index: u32, /// The number of symbols that have already been matched. @@ -47,35 +52,35 @@ pub(crate) struct ParseItem<'a> { pub has_preceding_inherited_fields: bool, } -/// A ParseItemSet represents a set of in-progress matches of productions in a +/// A [`ParseItemSet`] represents a set of in-progress matches of productions in a /// grammar, and for each in-progress match, a set of "lookaheads" - tokens that /// are allowed to *follow* the in-progress rule. This object corresponds directly /// to a state in the final parse table. -#[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct ParseItemSet<'a> { +#[derive(Clone, Debug, PartialEq, Eq, Default)] +pub struct ParseItemSet<'a> { pub entries: Vec<(ParseItem<'a>, TokenSet)>, } -/// A ParseItemSetCore is like a ParseItemSet, but without the lookahead +/// A [`ParseItemSetCore`] is like a [`ParseItemSet`], but without the lookahead /// information. Parse states with the same core are candidates for merging. #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct ParseItemSetCore<'a> { +pub struct ParseItemSetCore<'a> { pub entries: Vec>, } -pub(crate) struct ParseItemDisplay<'a>( +pub struct ParseItemDisplay<'a>( pub &'a ParseItem<'a>, pub &'a SyntaxGrammar, pub &'a LexicalGrammar, ); -pub(crate) struct TokenSetDisplay<'a>( +pub struct TokenSetDisplay<'a>( pub &'a TokenSet, pub &'a SyntaxGrammar, pub &'a LexicalGrammar, ); -pub(crate) struct ParseItemSetDisplay<'a>( +pub struct ParseItemSetDisplay<'a>( pub &'a ParseItemSet<'a>, pub &'a SyntaxGrammar, pub &'a LexicalGrammar, @@ -116,16 +121,19 @@ impl<'a> ParseItem<'a> { } } + #[must_use] pub fn is_done(&self) -> bool { self.step_index as usize == self.production.steps.len() } - pub fn is_augmented(&self) -> bool { + #[must_use] + pub const fn is_augmented(&self) -> bool { self.variable_index == u32::MAX } /// Create an item like this one, but advanced by one step. - pub fn successor(&self) -> ParseItem<'a> { + #[must_use] + pub const fn successor(&self) -> Self { ParseItem { variable_index: self.variable_index, production: self.production, @@ -136,8 +144,8 @@ impl<'a> ParseItem<'a> { /// Create an item identical to this one, but with a different production. /// This is used when dynamically "inlining" certain symbols in a production. - pub fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> { - let mut result = self.clone(); + pub const fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> { + let mut result = *self; result.production = production; result } @@ -172,14 +180,6 @@ impl<'a> ParseItemSet<'a> { } } -impl<'a> Default for ParseItemSet<'a> { - fn default() -> Self { - Self { - entries: Vec::new(), - } - } -} - impl<'a> fmt::Display for ParseItemDisplay<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { if self.0.is_augmented() { @@ -196,10 +196,10 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { if i == self.0.step_index as usize { write!(f, " •")?; if let Some(associativity) = step.associativity { - if !step.precedence.is_none() { - write!(f, " ({} {:?})", step.precedence, associativity)?; + if step.precedence.is_none() { + write!(f, " ({associativity:?})")?; } else { - write!(f, " ({:?})", associativity)?; + write!(f, " ({} {associativity:?})", step.precedence)?; } } else if !step.precedence.is_none() { write!(f, " ({})", step.precedence)?; @@ -211,7 +211,7 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { if let Some(variable) = self.2.variables.get(step.symbol.index) { write!(f, "{}", &variable.name)?; } else { - write!(f, "{}-{}", "terminal", step.symbol.index)?; + write!(f, "terminal-{}", step.symbol.index)?; } } else if step.symbol.is_external() { write!(f, "{}", &self.1.external_tokens[step.symbol.index].name)?; @@ -228,10 +228,10 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { write!(f, " •")?; if let Some(step) = self.0.production.steps.last() { if let Some(associativity) = step.associativity { - if !step.precedence.is_none() { - write!(f, " ({} {:?})", step.precedence, associativity)?; + if step.precedence.is_none() { + write!(f, " ({associativity:?})")?; } else { - write!(f, " ({:?})", associativity)?; + write!(f, " ({} {associativity:?})", step.precedence)?; } } else if !step.precedence.is_none() { write!(f, " ({})", step.precedence)?; @@ -255,7 +255,7 @@ impl<'a> fmt::Display for TokenSetDisplay<'a> { if let Some(variable) = self.2.variables.get(symbol.index) { write!(f, "{}", &variable.name)?; } else { - write!(f, "{}-{}", "terminal", symbol.index)?; + write!(f, "terminal-{}", symbol.index)?; } } else if symbol.is_external() { write!(f, "{}", &self.1.external_tokens[symbol.index].name)?; @@ -270,7 +270,7 @@ impl<'a> fmt::Display for TokenSetDisplay<'a> { impl<'a> fmt::Display for ParseItemSetDisplay<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - for (item, lookaheads) in self.0.entries.iter() { + for (item, lookaheads) in &self.0.entries { writeln!( f, "{}\t{}", @@ -288,7 +288,7 @@ impl<'a> Hash for ParseItem<'a> { hasher.write_u32(self.step_index); hasher.write_i32(self.production.dynamic_precedence); hasher.write_usize(self.production.steps.len()); - hasher.write_i32(self.has_preceding_inherited_fields as i32); + hasher.write_i32(i32::from(self.has_preceding_inherited_fields)); self.precedence().hash(hasher); self.associativity().hash(hasher); @@ -344,7 +344,7 @@ impl<'a> PartialEq for ParseItem<'a> { } } - return true; + true } } @@ -364,7 +364,7 @@ impl<'a> Ord for ParseItem<'a> { .len() .cmp(&other.production.steps.len()) }) - .then_with(|| self.precedence().cmp(&other.precedence())) + .then_with(|| self.precedence().cmp(other.precedence())) .then_with(|| self.associativity().cmp(&other.associativity())) .then_with(|| { for (i, step) in self.production.steps.iter().enumerate() { @@ -383,7 +383,7 @@ impl<'a> Ord for ParseItem<'a> { return o; } } - return Ordering::Equal; + Ordering::Equal }) } } @@ -399,7 +399,7 @@ impl<'a> Eq for ParseItem<'a> {} impl<'a> Hash for ParseItemSet<'a> { fn hash(&self, hasher: &mut H) { hasher.write_usize(self.entries.len()); - for (item, lookaheads) in self.entries.iter() { + for (item, lookaheads) in &self.entries { item.hash(hasher); lookaheads.hash(hasher); } diff --git a/cli/src/generate/build_tables/item_set_builder.rs b/cli/src/generate/build_tables/item_set_builder.rs index 1828357..ff0323c 100644 --- a/cli/src/generate/build_tables/item_set_builder.rs +++ b/cli/src/generate/build_tables/item_set_builder.rs @@ -1,8 +1,13 @@ +use std::{ + collections::{HashMap, HashSet}, + fmt, +}; + use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSetDisplay}; -use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; -use crate::generate::rules::{Symbol, SymbolType, TokenSet}; -use std::collections::{HashMap, HashSet}; -use std::fmt; +use crate::generate::{ + grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}, + rules::{Symbol, SymbolType, TokenSet}, +}; #[derive(Clone, Debug, PartialEq, Eq)] struct TransitiveClosureAddition<'a> { @@ -16,7 +21,7 @@ struct FollowSetInfo { propagates_lookaheads: bool, } -pub(crate) struct ParseItemSetBuilder<'a> { +pub struct ParseItemSetBuilder<'a> { syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, first_sets: HashMap, @@ -69,9 +74,9 @@ impl<'a> ParseItemSetBuilder<'a> { } // The FIRST set of a non-terminal `i` is the union of the following sets: - // * the set of all terminals that appear at the beginings of i's productions - // * the FIRST sets of all the non-terminals that appear at the beginnings - // of i's productions + // * the set of all terminals that appear at the beginnings of i's productions + // * the FIRST sets of all the non-terminals that appear at the beginnings of i's + // productions // // Rather than computing these sets using recursion, we use an explicit stack // called `symbols_to_process`. @@ -80,7 +85,10 @@ impl<'a> ParseItemSetBuilder<'a> { for i in 0..syntax_grammar.variables.len() { let symbol = Symbol::non_terminal(i); - let first_set = &mut result.first_sets.entry(symbol).or_insert(TokenSet::new()); + let first_set = result + .first_sets + .entry(symbol) + .or_insert_with(TokenSet::new); processed_non_terminals.clear(); symbols_to_process.clear(); symbols_to_process.push(symbol); @@ -88,10 +96,7 @@ impl<'a> ParseItemSetBuilder<'a> { if current_symbol.is_terminal() || current_symbol.is_external() { first_set.insert(current_symbol); } else if processed_non_terminals.insert(current_symbol) { - for production in syntax_grammar.variables[current_symbol.index] - .productions - .iter() - { + for production in &syntax_grammar.variables[current_symbol.index].productions { if let Some(step) = production.steps.first() { symbols_to_process.push(step.symbol); } @@ -100,7 +105,7 @@ impl<'a> ParseItemSetBuilder<'a> { } // The LAST set is defined in a similar way to the FIRST set. - let last_set = &mut result.last_sets.entry(symbol).or_insert(TokenSet::new()); + let last_set = result.last_sets.entry(symbol).or_insert_with(TokenSet::new); processed_non_terminals.clear(); symbols_to_process.clear(); symbols_to_process.push(symbol); @@ -108,10 +113,7 @@ impl<'a> ParseItemSetBuilder<'a> { if current_symbol.is_terminal() || current_symbol.is_external() { last_set.insert(current_symbol); } else if processed_non_terminals.insert(current_symbol) { - for production in syntax_grammar.variables[current_symbol.index] - .productions - .iter() - { + for production in &syntax_grammar.variables[current_symbol.index].productions { if let Some(step) = production.steps.last() { symbols_to_process.push(step.symbol); } @@ -133,11 +135,11 @@ impl<'a> ParseItemSetBuilder<'a> { // item set when `i` occurs as the next symbol in one if its core items. The // structure of an *addition* is as follows: // * `item` - the new item that must be added as part of the expansion of `i` - // * `lookaheads` - lookahead tokens that can always come after that item in - // the expansion of `i` - // * `propagates_lookaheads` - a boolean indicating whether or not `item` can - // occur at the *end* of the expansion of `i`, so that i's own current - // lookahead tokens can occur after `item`. + // * `lookaheads` - lookahead tokens that can always come after that item in the expansion + // of `i` + // * `propagates_lookaheads` - a boolean indicating whether or not `item` can occur at the + // *end* of the expansion of `i`, so that i's own current lookahead tokens can occur + // after `item`. // // Again, rather than computing these additions recursively, we use an explicit // stack called `entries_to_process`. @@ -235,7 +237,7 @@ impl<'a> ParseItemSetBuilder<'a> { result } - pub(crate) fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> { + pub fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> { let mut result = ParseItemSet::default(); for (item, lookaheads) in &item_set.entries { if let Some(productions) = self @@ -270,11 +272,9 @@ impl<'a> ParseItemSetBuilder<'a> { let next_step = item.successor().step(); // Determine which tokens can follow this non-terminal. - let following_tokens = if let Some(next_step) = next_step { + let following_tokens = next_step.map_or(lookaheads, |next_step| { self.first_sets.get(&next_step.symbol).unwrap() - } else { - &lookaheads - }; + }); // Use the pre-computed *additions* to expand the non-terminal. for addition in &self.transitive_closure_additions[step.symbol.index] { @@ -291,9 +291,9 @@ impl<'a> ParseItemSetBuilder<'a> { impl<'a> fmt::Debug for ParseItemSetBuilder<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "ParseItemSetBuilder {{\n")?; + writeln!(f, "ParseItemSetBuilder {{")?; - write!(f, " first_sets: {{\n")?; + writeln!(f, " first_sets: {{")?; for (symbol, first_set) in &self.first_sets { let name = match symbol.kind { SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name, @@ -301,16 +301,15 @@ impl<'a> fmt::Debug for ParseItemSetBuilder<'a> { SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name, SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END", }; - write!( + writeln!( f, - " first({:?}): {}\n", - name, - TokenSetDisplay(first_set, &self.syntax_grammar, &self.lexical_grammar) + " first({name:?}): {}", + TokenSetDisplay(first_set, self.syntax_grammar, self.lexical_grammar) )?; } - write!(f, " }}\n")?; + writeln!(f, " }}")?; - write!(f, " last_sets: {{\n")?; + writeln!(f, " last_sets: {{")?; for (symbol, last_set) in &self.last_sets { let name = match symbol.kind { SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name, @@ -318,26 +317,25 @@ impl<'a> fmt::Debug for ParseItemSetBuilder<'a> { SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name, SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END", }; - write!( + writeln!( f, - " last({:?}): {}\n", - name, - TokenSetDisplay(last_set, &self.syntax_grammar, &self.lexical_grammar) + " last({name:?}): {}", + TokenSetDisplay(last_set, self.syntax_grammar, self.lexical_grammar) )?; } - write!(f, " }}\n")?; + writeln!(f, " }}")?; - write!(f, " additions: {{\n")?; + writeln!(f, " additions: {{")?; for (i, variable) in self.syntax_grammar.variables.iter().enumerate() { - write!(f, " {}: {{\n", variable.name)?; + writeln!(f, " {}: {{", variable.name)?; for addition in &self.transitive_closure_additions[i] { - write!( + writeln!( f, - " {}\n", + " {}", ParseItemDisplay(&addition.item, self.syntax_grammar, self.lexical_grammar) )?; } - write!(f, " }},\n")?; + writeln!(f, " }},")?; } write!(f, " }},")?; diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index d10bea5..5eb6260 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -1,15 +1,19 @@ -use super::token_conflicts::TokenConflictMap; -use crate::generate::dedup::split_state_id_groups; -use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; -use crate::generate::rules::{AliasMap, Symbol, TokenSet}; -use crate::generate::tables::{ - GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, +use std::{ + collections::{HashMap, HashSet}, + mem, }; + use log::info; -use std::collections::{HashMap, HashSet}; -use std::mem; -pub(crate) fn minimize_parse_table( +use super::token_conflicts::TokenConflictMap; +use crate::generate::{ + dedup::split_state_id_groups, + grammars::{LexicalGrammar, SyntaxGrammar, VariableType}, + rules::{AliasMap, Symbol, TokenSet}, + tables::{GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry}, +}; + +pub fn minimize_parse_table( parse_table: &mut ParseTable, syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, @@ -67,9 +71,9 @@ impl<'a> Minimizer<'a> { symbol, .. } => { - if !self.simple_aliases.contains_key(&symbol) - && !self.syntax_grammar.supertype_symbols.contains(&symbol) - && !aliased_symbols.contains(&symbol) + if !self.simple_aliases.contains_key(symbol) + && !self.syntax_grammar.supertype_symbols.contains(symbol) + && !aliased_symbols.contains(symbol) && self.syntax_grammar.variables[symbol.index].kind != VariableType::Named && (unit_reduction_symbol.is_none() @@ -97,21 +101,22 @@ impl<'a> Minimizer<'a> { } } - for state in self.parse_table.states.iter_mut() { + for state in &mut self.parse_table.states { let mut done = false; while !done { done = true; state.update_referenced_states(|other_state_id, state| { - if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) { - done = false; - match state.nonterminal_entries.get(symbol) { - Some(GotoAction::Goto(state_id)) => *state_id, - _ => other_state_id, - } - } else { - other_state_id - } - }) + unit_reduction_symbols_by_state.get(&other_state_id).map_or( + other_state_id, + |symbol| { + done = false; + match state.nonterminal_entries.get(symbol) { + Some(GotoAction::Goto(state_id)) => *state_id, + _ => other_state_id, + } + }, + ) + }); } } } @@ -198,7 +203,7 @@ impl<'a> Minimizer<'a> { &self, left_state: &ParseState, right_state: &ParseState, - group_ids_by_state_id: &Vec, + group_ids_by_state_id: &[ParseStateId], ) -> bool { for (token, left_entry) in &left_state.terminal_entries { if let Some(right_entry) = right_state.terminal_entries.get(token) { @@ -223,15 +228,15 @@ impl<'a> Minimizer<'a> { } for token in right_state.terminal_entries.keys() { - if !left_state.terminal_entries.contains_key(token) { - if self.token_conflicts( + if !left_state.terminal_entries.contains_key(token) + && self.token_conflicts( left_state.id, right_state.id, left_state.terminal_entries.keys(), *token, - ) { - return true; - } + ) + { + return true; } } @@ -242,7 +247,7 @@ impl<'a> Minimizer<'a> { &self, state1: &ParseState, state2: &ParseState, - group_ids_by_state_id: &Vec, + group_ids_by_state_id: &[ParseStateId], ) -> bool { for (token, entry1) in &state1.terminal_entries { if let ParseAction::Shift { state: s1, .. } = entry1.actions.last().unwrap() { @@ -252,12 +257,10 @@ impl<'a> Minimizer<'a> { let group2 = group_ids_by_state_id[*s2]; if group1 != group2 { info!( - "split states {} {} - successors for {} are split: {} {}", + "split states {} {} - successors for {} are split: {s1} {s2}", state1.id, state2.id, self.symbol_name(token), - s1, - s2, ); return true; } @@ -275,12 +278,10 @@ impl<'a> Minimizer<'a> { let group2 = group_ids_by_state_id[*s2]; if group1 != group2 { info!( - "split states {} {} - successors for {} are split: {} {}", + "split states {} {} - successors for {} are split: {s1} {s2}", state1.id, state2.id, self.symbol_name(symbol), - s1, - s2, ); return true; } @@ -300,16 +301,14 @@ impl<'a> Minimizer<'a> { token: &Symbol, entry1: &ParseTableEntry, entry2: &ParseTableEntry, - group_ids_by_state_id: &Vec, + group_ids_by_state_id: &[ParseStateId], ) -> bool { // To be compatible, entries need to have the same actions. let actions1 = &entry1.actions; let actions2 = &entry2.actions; if actions1.len() != actions2.len() { info!( - "split states {} {} - differing action counts for token {}", - state_id1, - state_id2, + "split states {state_id1} {state_id2} - differing action counts for token {}", self.symbol_name(token) ); return true; @@ -334,22 +333,15 @@ impl<'a> Minimizer<'a> { let group2 = group_ids_by_state_id[*s2]; if group1 == group2 && is_repetition1 == is_repetition2 { continue; - } else { - info!( - "split states {} {} - successors for {} are split: {} {}", - state_id1, - state_id2, - self.symbol_name(token), - s1, - s2, - ); - return true; } + info!( + "split states {state_id1} {state_id2} - successors for {} are split: {s1} {s2}", + self.symbol_name(token), + ); + return true; } else if action1 != action2 { info!( - "split states {} {} - unequal actions for {}", - state_id1, - state_id2, + "split states {state_id1} {state_id2} - unequal actions for {}", self.symbol_name(token), ); return true; @@ -367,10 +359,7 @@ impl<'a> Minimizer<'a> { new_token: Symbol, ) -> bool { if new_token == Symbol::end_of_nonterminal_extra() { - info!( - "split states {} {} - end of non-terminal extra", - left_id, right_id, - ); + info!("split states {left_id} {right_id} - end of non-terminal extra",); return true; } @@ -378,9 +367,7 @@ impl<'a> Minimizer<'a> { // existing lookahead tokens. if new_token.is_external() { info!( - "split states {} {} - external token {}", - left_id, - right_id, + "split states {left_id} {right_id} - external token {}", self.symbol_name(&new_token), ); return true; @@ -395,9 +382,7 @@ impl<'a> Minimizer<'a> { .any(|external| external.corresponding_internal_token == Some(new_token)) { info!( - "split states {} {} - internal/external token {}", - left_id, - right_id, + "split states {left_id} {right_id} - internal/external token {}", self.symbol_name(&new_token), ); return true; @@ -405,27 +390,24 @@ impl<'a> Minimizer<'a> { // Do not add a token if it conflicts with an existing token. for token in existing_tokens { - if token.is_terminal() { - if !(self.syntax_grammar.word_token == Some(*token) + if token.is_terminal() + && !(self.syntax_grammar.word_token == Some(*token) && self.keywords.contains(&new_token)) - && !(self.syntax_grammar.word_token == Some(new_token) - && self.keywords.contains(token)) - && (self + && !(self.syntax_grammar.word_token == Some(new_token) + && self.keywords.contains(token)) + && (self + .token_conflict_map + .does_conflict(new_token.index, token.index) + || self .token_conflict_map - .does_conflict(new_token.index, token.index) - || self - .token_conflict_map - .does_match_same_string(new_token.index, token.index)) - { - info!( - "split states {} {} - token {} conflicts with {}", - left_id, - right_id, - self.symbol_name(&new_token), - self.symbol_name(token), - ); - return true; - } + .does_match_same_string(new_token.index, token.index)) + { + info!( + "split states {left_id} {right_id} - token {} conflicts with {}", + self.symbol_name(&new_token), + self.symbol_name(token), + ); + return true; } } diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs index fe99625..34fa3fa 100644 --- a/cli/src/generate/build_tables/mod.rs +++ b/cli/src/generate/build_tables/mod.rs @@ -1,33 +1,48 @@ -pub(crate) mod build_lex_table; -pub(crate) mod build_parse_table; +mod build_lex_table; +mod build_parse_table; mod coincident_tokens; mod item; mod item_set_builder; mod minimize_parse_table; mod token_conflicts; -use self::build_lex_table::build_lex_table; -use self::build_parse_table::{build_parse_table, ParseStateInfo}; -use self::coincident_tokens::CoincidentTokenIndex; -use self::minimize_parse_table::minimize_parse_table; -use self::token_conflicts::TokenConflictMap; -use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; -use crate::generate::nfa::NfaCursor; -use crate::generate::node_types::VariableInfo; -use crate::generate::rules::{AliasMap, Symbol, SymbolType, TokenSet}; -use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry}; +use std::collections::{BTreeSet, HashMap}; + use anyhow::Result; +pub use build_lex_table::LARGE_CHARACTER_RANGE_COUNT; use log::info; -use std::collections::{BTreeSet, HashMap}; -pub(crate) fn build_tables( +use self::{ + build_lex_table::build_lex_table, + build_parse_table::{build_parse_table, ParseStateInfo}, + coincident_tokens::CoincidentTokenIndex, + minimize_parse_table::minimize_parse_table, + token_conflicts::TokenConflictMap, +}; +use crate::generate::{ + grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}, + nfa::{CharacterSet, NfaCursor}, + node_types::VariableInfo, + rules::{AliasMap, Symbol, SymbolType, TokenSet}, + tables::{LexTable, ParseAction, ParseTable, ParseTableEntry}, +}; + +pub struct Tables { + pub parse_table: ParseTable, + pub main_lex_table: LexTable, + pub keyword_lex_table: LexTable, + pub word_token: Option, + pub large_character_sets: Vec<(Option, CharacterSet)>, +} + +pub fn build_tables( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, simple_aliases: &AliasMap, - variable_info: &Vec, + variable_info: &[VariableInfo], inlines: &InlinedProductionMap, report_symbol_name: Option<&str>, -) -> Result<(ParseTable, LexTable, LexTable, Option)> { +) -> Result { let (mut parse_table, following_tokens, parse_state_info) = build_parse_table(syntax_grammar, lexical_grammar, inlines, variable_info)?; let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens); @@ -56,7 +71,7 @@ pub(crate) fn build_tables( &token_conflict_map, &keywords, ); - let (main_lex_table, keyword_lex_table) = build_lex_table( + let lex_tables = build_lex_table( &mut parse_table, syntax_grammar, lexical_grammar, @@ -69,19 +84,21 @@ pub(crate) fn build_tables( if let Some(report_symbol_name) = report_symbol_name { report_state_info( - &syntax_grammar, - &lexical_grammar, + syntax_grammar, + lexical_grammar, &parse_table, &parse_state_info, report_symbol_name, ); } - Ok(( + + Ok(Tables { parse_table, - main_lex_table, - keyword_lex_table, - syntax_grammar.word_token, - )) + main_lex_table: lex_tables.main_lex_table, + keyword_lex_table: lex_tables.keyword_lex_table, + large_character_sets: lex_tables.large_character_sets, + word_token: syntax_grammar.word_token, + }) } fn populate_error_state( @@ -97,10 +114,9 @@ fn populate_error_state( // First identify the *conflict-free tokens*: tokens that do not overlap with // any other token in any way, besides matching exactly the same string. - let conflict_free_tokens: TokenSet = (0..n) - .into_iter() + let conflict_free_tokens = (0..n) .filter_map(|i| { - let conflicts_with_other_tokens = (0..n).into_iter().any(|j| { + let conflicts_with_other_tokens = (0..n).any(|j| { j != i && !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j)) && token_conflict_map.does_match_shorter_or_longer(i, j) @@ -115,7 +131,7 @@ fn populate_error_state( Some(Symbol::terminal(i)) } }) - .collect(); + .collect::(); let recover_entry = ParseTableEntry { reusable: false, @@ -126,18 +142,19 @@ fn populate_error_state( // the *conflict-free tokens* identified above. for i in 0..n { let symbol = Symbol::terminal(i); - if !conflict_free_tokens.contains(&symbol) && !keywords.contains(&symbol) { - if syntax_grammar.word_token != Some(symbol) { - if let Some(t) = conflict_free_tokens.iter().find(|t| { - !coincident_token_index.contains(symbol, *t) - && token_conflict_map.does_conflict(symbol.index, t.index) - }) { - info!( - "error recovery - exclude token {} because of conflict with {}", - lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name - ); - continue; - } + if !conflict_free_tokens.contains(&symbol) + && !keywords.contains(&symbol) + && syntax_grammar.word_token != Some(symbol) + { + if let Some(t) = conflict_free_tokens.iter().find(|t| { + !coincident_token_index.contains(symbol, *t) + && token_conflict_map.does_conflict(symbol.index, t.index) + }) { + info!( + "error recovery - exclude token {} because of conflict with {}", + lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name + ); + continue; } } info!( @@ -263,7 +280,7 @@ fn identify_keywords( // First find all of the candidate keyword tokens: tokens that start with // letters or underscore and can match the same string as a word token. - let keyword_candidates: TokenSet = lexical_grammar + let keyword_candidates = lexical_grammar .variables .iter() .enumerate() @@ -282,10 +299,10 @@ fn identify_keywords( None } }) - .collect(); + .collect::(); // Exclude keyword candidates that shadow another keyword candidate. - let keywords: TokenSet = keyword_candidates + let keywords = keyword_candidates .iter() .filter(|token| { for other_token in keyword_candidates.iter() { @@ -302,7 +319,7 @@ fn identify_keywords( } true }) - .collect(); + .collect::(); // Exclude keyword candidates for which substituting the keyword capture // token would introduce new lexical conflicts with other tokens. @@ -361,7 +378,7 @@ fn mark_fragile_tokens( ) { let n = lexical_grammar.variables.len(); let mut valid_tokens_mask = Vec::with_capacity(n); - for state in parse_table.states.iter_mut() { + for state in &mut parse_table.states { valid_tokens_mask.clear(); valid_tokens_mask.resize(n, false); for token in state.terminal_entries.keys() { @@ -369,14 +386,12 @@ fn mark_fragile_tokens( valid_tokens_mask[token.index] = true; } } - for (token, entry) in state.terminal_entries.iter_mut() { + for (token, entry) in &mut state.terminal_entries { if token.is_terminal() { for (i, is_valid) in valid_tokens_mask.iter().enumerate() { - if *is_valid { - if token_conflict_map.does_overlap(i, token.index) { - entry.reusable = false; - break; - } + if *is_valid && token_conflict_map.does_overlap(i, token.index) { + entry.reusable = false; + break; } } } @@ -388,7 +403,7 @@ fn report_state_info<'a>( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, parse_table: &ParseTable, - parse_state_info: &Vec>, + parse_state_info: &[ParseStateInfo<'a>], report_symbol_name: &'a str, ) { let mut all_state_indices = BTreeSet::new(); @@ -399,7 +414,7 @@ fn report_state_info<'a>( for (i, state) in parse_table.states.iter().enumerate() { all_state_indices.insert(i); let item_set = &parse_state_info[state.id]; - for (item, _) in item_set.1.entries.iter() { + for (item, _) in &item_set.1.entries { if !item.is_augmented() { symbols_with_state_indices[item.variable_index as usize] .1 @@ -424,7 +439,7 @@ fn report_state_info<'a>( width = max_symbol_name_length ); } - eprintln!(""); + eprintln!(); let state_indices = if report_symbol_name == "*" { Some(&all_state_indices) @@ -441,14 +456,14 @@ fn report_state_info<'a>( }; if let Some(state_indices) = state_indices { - let mut state_indices = state_indices.into_iter().cloned().collect::>(); + let mut state_indices = state_indices.iter().copied().collect::>(); state_indices.sort_unstable_by_key(|i| (parse_table.states[*i].core_id, *i)); for state_index in state_indices { let id = parse_table.states[state_index].id; let (preceding_symbols, item_set) = &parse_state_info[id]; - eprintln!("state index: {}", state_index); - eprintln!("state id: {}", id); + eprintln!("state index: {state_index}"); + eprintln!("state id: {id}"); eprint!("symbol sequence:"); for symbol in preceding_symbols { let name = if symbol.is_terminal() { @@ -458,11 +473,11 @@ fn report_state_info<'a>( } else { &syntax_grammar.variables[symbol.index].name }; - eprint!(" {}", name); + eprint!(" {name}"); } eprintln!( "\nitems:\n{}", - self::item::ParseItemSetDisplay(&item_set, syntax_grammar, lexical_grammar,), + self::item::ParseItemSetDisplay(item_set, syntax_grammar, lexical_grammar,), ); } } diff --git a/cli/src/generate/build_tables/token_conflicts.rs b/cli/src/generate/build_tables/token_conflicts.rs index 223d348..47a114d 100644 --- a/cli/src/generate/build_tables/token_conflicts.rs +++ b/cli/src/generate/build_tables/token_conflicts.rs @@ -1,10 +1,11 @@ -use crate::generate::build_tables::item::TokenSetDisplay; -use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; -use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition}; -use crate::generate::rules::TokenSet; -use std::cmp::Ordering; -use std::collections::HashSet; -use std::fmt; +use std::{cmp::Ordering, collections::HashSet, fmt}; + +use crate::generate::{ + build_tables::item::TokenSetDisplay, + grammars::{LexicalGrammar, SyntaxGrammar}, + nfa::{CharacterSet, NfaCursor, NfaTransition}, + rules::TokenSet, +}; #[derive(Clone, Debug, Default, PartialEq, Eq)] struct TokenConflictStatus { @@ -16,7 +17,7 @@ struct TokenConflictStatus { matches_different_string: bool, } -pub(crate) struct TokenConflictMap<'a> { +pub struct TokenConflictMap<'a> { n: usize, status_matrix: Vec, following_tokens: Vec, @@ -104,19 +105,17 @@ impl<'a> TokenConflictMap<'a> { } pub fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool { - if left.0 > right.0 { - return true; - } else if left.0 < right.0 { - return false; - } - - match grammar.variables[left.1] - .implicit_precedence - .cmp(&grammar.variables[right.1].implicit_precedence) - { + match left.0.cmp(&right.0) { Ordering::Less => false, Ordering::Greater => true, - Ordering::Equal => left.1 < right.1, + Ordering::Equal => match grammar.variables[left.1] + .implicit_precedence + .cmp(&grammar.variables[right.1].implicit_precedence) + { + Ordering::Less => false, + Ordering::Greater => true, + Ordering::Equal => left.1 < right.1, + }, } } @@ -135,10 +134,9 @@ impl<'a> TokenConflictMap<'a> { return false; } if has_separator_transitions - && grammar + && !grammar .variable_indices_for_nfa_states(&t.states) - .position(|i| i == completed_id) - .is_none() + .any(|i| i == completed_id) { return false; } @@ -149,53 +147,53 @@ impl<'a> TokenConflictMap<'a> { impl<'a> fmt::Debug for TokenConflictMap<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "TokenConflictMap {{\n")?; + writeln!(f, "TokenConflictMap {{")?; let syntax_grammar = SyntaxGrammar::default(); - write!(f, " following_tokens: {{\n")?; + writeln!(f, " following_tokens: {{")?; for (i, following_tokens) in self.following_tokens.iter().enumerate() { - write!( + writeln!( f, - " follow({:?}): {},\n", + " follow({:?}): {},", self.grammar.variables[i].name, - TokenSetDisplay(following_tokens, &syntax_grammar, &self.grammar) + TokenSetDisplay(following_tokens, &syntax_grammar, self.grammar) )?; } - write!(f, " }},\n")?; + writeln!(f, " }},")?; - write!(f, " starting_characters: {{\n")?; + writeln!(f, " starting_characters: {{")?; for i in 0..self.n { - write!( + writeln!( f, - " {:?}: {:?},\n", + " {:?}: {:?},", self.grammar.variables[i].name, self.starting_chars_by_index[i] )?; } - write!(f, " }},\n")?; + writeln!(f, " }},")?; - write!(f, " following_characters: {{\n")?; + writeln!(f, " following_characters: {{")?; for i in 0..self.n { - write!( + writeln!( f, - " {:?}: {:?},\n", + " {:?}: {:?},", self.grammar.variables[i].name, self.following_chars_by_index[i] )?; } - write!(f, " }},\n")?; + writeln!(f, " }},")?; - write!(f, " status_matrix: {{\n")?; + writeln!(f, " status_matrix: {{")?; for i in 0..self.n { - write!(f, " {:?}: {{\n", self.grammar.variables[i].name)?; + writeln!(f, " {:?}: {{", self.grammar.variables[i].name)?; for j in 0..self.n { - write!( + writeln!( f, - " {:?}: {:?},\n", + " {:?}: {:?},", self.grammar.variables[j].name, self.status_matrix[matrix_index(self.n, i, j)] )?; } - write!(f, " }},\n")?; + writeln!(f, " }},")?; } write!(f, " }},")?; write!(f, "}}")?; @@ -203,7 +201,7 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> { } } -fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize { +const fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize { variable_count * i + j } @@ -221,8 +219,8 @@ fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec, - following_tokens: &Vec, + starting_chars: &[CharacterSet], + following_tokens: &[TokenSet], ) -> Vec { following_tokens .iter() @@ -241,7 +239,7 @@ fn get_following_chars( fn compute_conflict_status( cursor: &mut NfaCursor, grammar: &LexicalGrammar, - following_chars: &Vec, + following_chars: &[CharacterSet], i: usize, j: usize, ) -> (TokenConflictStatus, TokenConflictStatus) { @@ -330,9 +328,8 @@ fn compute_conflict_status( if variable_id == completed_id { successor_contains_completed_id = true; break; - } else { - advanced_id = Some(variable_id); } + advanced_id = Some(variable_id); } // Determine which action is preferred: matching the already complete @@ -357,12 +354,10 @@ fn compute_conflict_status( result.1.does_match_valid_continuation = true; } } + } else if completed_id == i { + result.0.matches_prefix = true; } else { - if completed_id == i { - result.0.matches_prefix = true; - } else { - result.1.matches_prefix = true; - } + result.1.matches_prefix = true; } } } @@ -378,9 +373,11 @@ fn compute_conflict_status( #[cfg(test)] mod tests { use super::*; - use crate::generate::grammars::{Variable, VariableType}; - use crate::generate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar}; - use crate::generate::rules::{Precedence, Rule, Symbol}; + use crate::generate::{ + grammars::{Variable, VariableType}, + prepare_grammar::{expand_tokens, ExtractedLexicalGrammar}, + rules::{Precedence, Rule, Symbol}, + }; #[test] fn test_starting_characters() { @@ -390,12 +387,12 @@ mod tests { Variable { name: "token_0".to_string(), kind: VariableType::Named, - rule: Rule::pattern("[a-f]1|0x\\d"), + rule: Rule::pattern("[a-f]1|0x\\d", ""), }, Variable { name: "token_1".to_string(), kind: VariableType::Named, - rule: Rule::pattern("d*ef"), + rule: Rule::pattern("d*ef", ""), }, ], }) @@ -426,7 +423,7 @@ mod tests { Variable { name: "identifier".to_string(), kind: VariableType::Named, - rule: Rule::pattern("\\w+"), + rule: Rule::pattern("\\w+", ""), }, Variable { name: "instanceof".to_string(), @@ -442,14 +439,14 @@ mod tests { let token_map = TokenConflictMap::new( &grammar, vec![ - [Symbol::terminal(var("identifier"))] - .iter() - .cloned() + std::iter::once(&Symbol::terminal(var("identifier"))) + .copied() + .collect(), + std::iter::once(&Symbol::terminal(var("in"))) + .copied() .collect(), - [Symbol::terminal(var("in"))].iter().cloned().collect(), - [Symbol::terminal(var("identifier"))] - .iter() - .cloned() + std::iter::once(&Symbol::terminal(var("identifier"))) + .copied() .collect(), ], ); @@ -471,7 +468,7 @@ mod tests { #[test] fn test_token_conflicts_with_separators() { let grammar = expand_tokens(ExtractedLexicalGrammar { - separators: vec![Rule::pattern("\\s")], + separators: vec![Rule::pattern("\\s", "")], variables: vec![ Variable { name: "x".to_string(), @@ -498,7 +495,7 @@ mod tests { #[test] fn test_token_conflicts_with_open_ended_tokens() { let grammar = expand_tokens(ExtractedLexicalGrammar { - separators: vec![Rule::pattern("\\s")], + separators: vec![Rule::pattern("\\s", "")], variables: vec![ Variable { name: "x".to_string(), @@ -508,7 +505,7 @@ mod tests { Variable { name: "anything".to_string(), kind: VariableType::Named, - rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*")), + rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*", "")), }, ], }) diff --git a/cli/src/generate/char_tree.rs b/cli/src/generate/char_tree.rs deleted file mode 100644 index 2de5e83..0000000 --- a/cli/src/generate/char_tree.rs +++ /dev/null @@ -1,133 +0,0 @@ -use std::ops::Range; - -/// A set of characters represented as a balanced binary tree of comparisons. -/// This is used as an intermediate step in generating efficient code for -/// matching a given character set. -#[derive(PartialEq, Eq)] -pub enum CharacterTree { - Yes, - Compare { - value: char, - operator: Comparator, - consequence: Option>, - alternative: Option>, - }, -} - -#[derive(PartialEq, Eq)] -pub enum Comparator { - Less, - LessOrEqual, - Equal, - GreaterOrEqual, -} - -impl CharacterTree { - pub fn from_ranges(ranges: &[Range]) -> Option { - match ranges.len() { - 0 => None, - 1 => { - let range = &ranges[0]; - if range.start == range.end { - Some(CharacterTree::Compare { - operator: Comparator::Equal, - value: range.start, - consequence: Some(Box::new(CharacterTree::Yes)), - alternative: None, - }) - } else { - Some(CharacterTree::Compare { - operator: Comparator::GreaterOrEqual, - value: range.start, - consequence: Some(Box::new(CharacterTree::Compare { - operator: Comparator::LessOrEqual, - value: range.end, - consequence: Some(Box::new(CharacterTree::Yes)), - alternative: None, - })), - alternative: None, - }) - } - } - len => { - let mid = len / 2; - let mid_range = &ranges[mid]; - Some(CharacterTree::Compare { - operator: Comparator::Less, - value: mid_range.start, - consequence: Self::from_ranges(&ranges[0..mid]).map(Box::new), - alternative: Some(Box::new(CharacterTree::Compare { - operator: Comparator::LessOrEqual, - value: mid_range.end, - consequence: Some(Box::new(CharacterTree::Yes)), - alternative: Self::from_ranges(&ranges[(mid + 1)..]).map(Box::new), - })), - }) - } - } - } - - #[cfg(test)] - fn contains(&self, c: char) -> bool { - match self { - CharacterTree::Yes => true, - CharacterTree::Compare { - value, - operator, - alternative, - consequence, - } => { - let condition = match operator { - Comparator::Less => c < *value, - Comparator::LessOrEqual => c <= *value, - Comparator::Equal => c == *value, - Comparator::GreaterOrEqual => c >= *value, - }; - if condition { consequence } else { alternative } - .as_ref() - .map_or(false, |a| a.contains(c)) - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_character_tree_simple() { - let tree = CharacterTree::from_ranges(&['a'..'d', 'h'..'l', 'p'..'r', 'u'..'u', 'z'..'z']) - .unwrap(); - - assert!(tree.contains('a')); - assert!(tree.contains('b')); - assert!(tree.contains('c')); - assert!(tree.contains('d')); - - assert!(!tree.contains('e')); - assert!(!tree.contains('f')); - assert!(!tree.contains('g')); - - assert!(tree.contains('h')); - assert!(tree.contains('i')); - assert!(tree.contains('j')); - assert!(tree.contains('k')); - assert!(tree.contains('l')); - - assert!(!tree.contains('m')); - assert!(!tree.contains('n')); - assert!(!tree.contains('o')); - - assert!(tree.contains('p')); - assert!(tree.contains('q')); - assert!(tree.contains('r')); - - assert!(!tree.contains('s')); - assert!(!tree.contains('s')); - - assert!(tree.contains('u')); - - assert!(!tree.contains('v')); - } -} diff --git a/cli/src/generate/dedup.rs b/cli/src/generate/dedup.rs index dcba231..fffe267 100644 --- a/cli/src/generate/dedup.rs +++ b/cli/src/generate/dedup.rs @@ -1,9 +1,9 @@ -pub(crate) fn split_state_id_groups( - states: &Vec, +pub fn split_state_id_groups( + states: &[S], state_ids_by_group_id: &mut Vec>, - group_ids_by_state_id: &mut Vec, + group_ids_by_state_id: &mut [usize], start_group_id: usize, - mut f: impl FnMut(&S, &S, &Vec) -> bool, + mut f: impl FnMut(&S, &S, &[usize]) -> bool, ) -> bool { let mut result = false; @@ -33,7 +33,7 @@ pub(crate) fn split_state_id_groups( } let right_state = &states[right_state_id]; - if f(left_state, right_state, &group_ids_by_state_id) { + if f(left_state, right_state, group_ids_by_state_id) { split_state_ids.push(right_state_id); } @@ -44,9 +44,9 @@ pub(crate) fn split_state_id_groups( } // If any states were removed from the group, add them all as a new group. - if split_state_ids.len() > 0 { + if !split_state_ids.is_empty() { result = true; - state_ids_by_group_id[group_id].retain(|i| !split_state_ids.contains(&i)); + state_ids_by_group_id[group_id].retain(|i| !split_state_ids.contains(i)); let new_group_id = state_ids_by_group_id.len(); for id in &split_state_ids { diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index 4281cee..3b9b182 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -23,7 +23,7 @@ function alias(rule, value) { } } - throw new Error('Invalid alias value ' + value); + throw new Error(`Invalid alias value ${value}`); } function blank() { @@ -35,7 +35,7 @@ function blank() { function field(name, rule) { return { type: "FIELD", - name: name, + name, content: normalize(rule) } } @@ -48,13 +48,14 @@ function choice(...elements) { } function optional(value) { - checkArguments(arguments.length, optional, 'optional'); + checkArguments(arguments, arguments.length, optional, 'optional'); return choice(value, blank()); } function prec(number, rule) { checkPrecedence(number); checkArguments( + arguments, arguments.length - 1, prec, 'prec', @@ -76,6 +77,7 @@ prec.left = function(number, rule) { checkPrecedence(number); checkArguments( + arguments, arguments.length - 1, prec.left, 'prec.left', @@ -97,6 +99,7 @@ prec.right = function(number, rule) { checkPrecedence(number); checkArguments( + arguments, arguments.length - 1, prec.right, 'prec.right', @@ -113,6 +116,7 @@ prec.right = function(number, rule) { prec.dynamic = function(number, rule) { checkPrecedence(number); checkArguments( + arguments, arguments.length - 1, prec.dynamic, 'prec.dynamic', @@ -127,7 +131,7 @@ prec.dynamic = function(number, rule) { } function repeat(rule) { - checkArguments(arguments.length, repeat, 'repeat'); + checkArguments(arguments, arguments.length, repeat, 'repeat'); return { type: "REPEAT", content: normalize(rule) @@ -135,7 +139,7 @@ function repeat(rule) { } function repeat1(rule) { - checkArguments(arguments.length, repeat1, 'repeat1'); + checkArguments(arguments, arguments.length, repeat1, 'repeat1'); return { type: "REPEAT1", content: normalize(rule) @@ -152,11 +156,12 @@ function seq(...elements) { function sym(name) { return { type: "SYMBOL", - name: name + name }; } function token(value) { + checkArguments(arguments, arguments.length, token, 'token', '', 'literal'); return { type: "TOKEN", content: normalize(value) @@ -164,6 +169,7 @@ function token(value) { } token.immediate = function(value) { + checkArguments(arguments, arguments.length, token.immediate, 'token.immediate', '', 'literal'); return { type: "IMMEDIATE_TOKEN", content: normalize(value) @@ -181,7 +187,11 @@ function normalize(value) { value }; case RegExp: - return { + return value.flags ? { + type: 'PATTERN', + value: value.source, + flags: value.flags + } : { type: 'PATTERN', value: value.source }; @@ -191,17 +201,17 @@ function normalize(value) { if (typeof value.type === 'string') { return value; } else { - throw new TypeError("Invalid rule: " + value.toString()); + throw new TypeError(`Invalid rule: ${value}`); } } } function RuleBuilder(ruleMap) { return new Proxy({}, { - get(target, propertyName) { + get(_, propertyName) { const symbol = sym(propertyName); - if (!ruleMap || ruleMap.hasOwnProperty(propertyName)) { + if (!ruleMap || Object.prototype.hasOwnProperty.call(ruleMap, propertyName)) { return symbol; } else { const error = new ReferenceError(`Undefined symbol '${propertyName}'`); @@ -213,6 +223,8 @@ function RuleBuilder(ruleMap) { } function grammar(baseGrammar, options) { + let inherits = null; + if (!options) { options = baseGrammar; baseGrammar = { @@ -225,6 +237,9 @@ function grammar(baseGrammar, options) { supertypes: [], precedences: [], }; + } else { + baseGrammar = baseGrammar.grammar; + inherits = baseGrammar.name; } let externals = baseGrammar.externals; @@ -244,10 +259,10 @@ function grammar(baseGrammar, options) { } const ruleMap = {}; - for (const key in options.rules) { + for (const key of Object.keys(options.rules)) { ruleMap[key] = true; } - for (const key in baseGrammar.rules) { + for (const key of Object.keys(baseGrammar.rules)) { ruleMap[key] = true; } for (const external of externals) { @@ -267,16 +282,24 @@ function grammar(baseGrammar, options) { throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters."); } - let rules = Object.assign({}, baseGrammar.rules); + if (inherits && typeof inherits !== "string") { + throw new Error("Base grammar's 'name' property must be a string."); + } + + if (inherits && !/^[a-zA-Z_]\w*$/.test(name)) { + throw new Error("Base grammar's 'name' property must not start with a digit and cannot contain non-word characters."); + } + + const rules = Object.assign({}, baseGrammar.rules); if (options.rules) { if (typeof options.rules !== "object") { throw new Error("Grammar's 'rules' property must be an object."); } - for (const ruleName in options.rules) { + for (const ruleName of Object.keys(options.rules)) { const ruleFn = options.rules[ruleName]; if (typeof ruleFn !== "function") { - throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not."); + throw new Error(`Grammar rules must all be functions. '${ruleName}' rule is not.`); } rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName])); } @@ -304,6 +327,10 @@ function grammar(baseGrammar, options) { if (typeof word != 'string') { throw new Error("Grammar's 'word' property must be a named rule."); } + + if (word === 'ReferenceError') { + throw new Error("Grammar's 'word' property must be a valid rule name."); + } } let conflicts = baseGrammar.conflicts; @@ -341,7 +368,17 @@ function grammar(baseGrammar, options) { throw new Error("Grammar's inline must be an array of rules."); } - inline = inlineRules.map(symbol => symbol.name); + inline = inlineRules.filter((symbol, index, self) => { + if (self.findIndex(s => s.name === symbol.name) !== index) { + console.log(`Warning: duplicate inline rule '${symbol.name}'`); + return false; + } + if (symbol.name === 'ReferenceError') { + console.log(`Warning: inline rule '${symbol.symbol.name}' is not defined.`); + return false; + } + return true; + }).map(symbol => symbol.name); } let supertypes = baseGrammar.supertypes; @@ -377,18 +414,36 @@ function grammar(baseGrammar, options) { }); } - if (Object.keys(rules).length == 0) { + if (Object.keys(rules).length === 0) { throw new Error("Grammar must have at least one rule."); } - return {name, word, rules, extras, conflicts, precedences, externals, inline, supertypes}; + return { + grammar: { + name, + ...(inherits ? ( inherits ) : {}), + word, + rules, + extras, + conflicts, + precedences, + externals, + inline, + supertypes, + }, + }; } -function checkArguments(ruleCount, caller, callerName, suffix = '') { - if (ruleCount > 1) { +function checkArguments(args, ruleCount, caller, callerName, suffix = '', argType = 'rule') { + // Allow for .map() usage where additional arguments are index and the entire array. + const isMapCall = ruleCount === 3 && typeof args[1] === 'number' && Array.isArray(args[2]); + if (isMapCall) { + ruleCount = typeof args[2] === 'number' ? 1 : args[2].length; + } + if (ruleCount > 1 && !isMapCall) { const error = new Error([ - `The \`${callerName}\` function only takes one rule argument${suffix}.`, - 'You passed multiple rules. Did you mean to call `seq`?\n' + `The \`${callerName}\` function only takes one ${argType} argument${suffix}.`, + `You passed in multiple ${argType}s. Did you mean to call \`seq\`?\n` ].join('\n')); Error.captureStackTrace(error, caller); throw error @@ -415,4 +470,4 @@ global.grammar = grammar; global.field = field; const result = require(process.env.TREE_SITTER_GRAMMAR_PATH); -console.log(JSON.stringify(result, null, 2)); +process.stdout.write(JSON.stringify(result.grammar, null, null)); diff --git a/cli/src/generate/grammar-schema.json b/cli/src/generate/grammar-schema.json index 5ca3537..1ed8395 100644 --- a/cli/src/generate/grammar-schema.json +++ b/cli/src/generate/grammar-schema.json @@ -14,6 +14,12 @@ "pattern": "^[a-zA-Z_]\\w*" }, + "inherits": { + "description": "the name of the parent grammar", + "type": "string", + "pattern": "^[a-zA-Z_]\\w*" + }, + "rules": { "type": "object", "patternProperties": { @@ -31,6 +37,16 @@ } }, + "precedences": { + "type": "array", + "items": { + "type": "array", + "items": { + "$ref": "#/definitions/rule" + } + } + }, + "externals": { "type": "array", "items": { @@ -63,7 +79,7 @@ }, "supertypes": { - "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.", + "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.", "type": "array", "items": { "description": "the name of a rule in `rules` or `extras`", @@ -105,7 +121,8 @@ "type": "string", "pattern": "^PATTERN$" }, - "value": { "type": "string" } + "value": { "type": "string" }, + "flags": { "type": "string" } }, "required": ["type", "value"] }, @@ -240,7 +257,10 @@ "pattern": "^(PREC|PREC_LEFT|PREC_RIGHT|PREC_DYNAMIC)$" }, "value": { - "type": "integer" + "oneof": [ + { "type": "integer" }, + { "type": "string" } + ] }, "content": { "$ref": "#/definitions/rule" diff --git a/cli/src/generate/grammar_files.rs b/cli/src/generate/grammar_files.rs new file mode 100644 index 0000000..6ad9cdc --- /dev/null +++ b/cli/src/generate/grammar_files.rs @@ -0,0 +1,596 @@ +use std::{ + fs, + fs::File, + io::BufReader, + path::{Path, PathBuf}, + str, +}; + +use anyhow::{anyhow, Context, Result}; +use heck::{ToKebabCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase}; +use indoc::indoc; +use serde::Deserialize; +use serde_json::{json, Map, Value}; + +use super::write_file; + +const CLI_VERSION: &str = env!("CARGO_PKG_VERSION"); +const CLI_VERSION_PLACEHOLDER: &str = "CLI_VERSION"; + +const PARSER_NAME_PLACEHOLDER: &str = "PARSER_NAME"; +const CAMEL_PARSER_NAME_PLACEHOLDER: &str = "CAMEL_PARSER_NAME"; +const UPPER_PARSER_NAME_PLACEHOLDER: &str = "UPPER_PARSER_NAME"; +const LOWER_PARSER_NAME_PLACEHOLDER: &str = "LOWER_PARSER_NAME"; + +const GRAMMAR_JS_TEMPLATE: &str = include_str!("./templates/grammar.js"); +const PACKAGE_JSON_TEMPLATE: &str = include_str!("./templates/package.json"); +const GITIGNORE_TEMPLATE: &str = include_str!("./templates/gitignore"); +const GITATTRIBUTES_TEMPLATE: &str = include_str!("./templates/gitattributes"); +const EDITORCONFIG_TEMPLATE: &str = include_str!("./templates/.editorconfig"); + +const RUST_BINDING_VERSION: &str = env!("CARGO_PKG_VERSION"); +const RUST_BINDING_VERSION_PLACEHOLDER: &str = "RUST_BINDING_VERSION"; + +const LIB_RS_TEMPLATE: &str = include_str!("./templates/lib.rs"); +const BUILD_RS_TEMPLATE: &str = include_str!("./templates/build.rs"); +const CARGO_TOML_TEMPLATE: &str = include_str!("./templates/cargo.toml"); + +const INDEX_JS_TEMPLATE: &str = include_str!("./templates/index.js"); +const INDEX_D_TS_TEMPLATE: &str = include_str!("./templates/index.d.ts"); +const JS_BINDING_CC_TEMPLATE: &str = include_str!("./templates/js-binding.cc"); +const BINDING_GYP_TEMPLATE: &str = include_str!("./templates/binding.gyp"); + +const MAKEFILE_TEMPLATE: &str = include_str!("./templates/makefile"); +const PARSER_NAME_H_TEMPLATE: &str = include_str!("./templates/PARSER_NAME.h"); +const PARSER_NAME_PC_IN_TEMPLATE: &str = include_str!("./templates/PARSER_NAME.pc.in"); + +const GO_MOD_TEMPLATE: &str = include_str!("./templates/go.mod"); +const BINDING_GO_TEMPLATE: &str = include_str!("./templates/binding.go"); +const BINDING_GO_TEST_TEMPLATE: &str = include_str!("./templates/binding_test.go"); + +const SETUP_PY_TEMPLATE: &str = include_str!("./templates/setup.py"); +const INIT_PY_TEMPLATE: &str = include_str!("./templates/__init__.py"); +const INIT_PYI_TEMPLATE: &str = include_str!("./templates/__init__.pyi"); +const PYPROJECT_TOML_TEMPLATE: &str = include_str!("./templates/pyproject.toml"); +const PY_BINDING_C_TEMPLATE: &str = include_str!("./templates/py-binding.c"); + +const PACKAGE_SWIFT_TEMPLATE: &str = include_str!("./templates/Package.swift"); + +#[derive(Deserialize, Debug)] +struct LanguageConfiguration {} + +#[derive(Deserialize, Debug)] +pub struct PackageJSON { + #[serde(rename = "tree-sitter")] + tree_sitter: Option>, +} + +pub fn path_in_ignore(repo_path: &Path) -> bool { + [ + "bindings", + "build", + "examples", + "node_modules", + "queries", + "script", + "src", + "target", + "test", + "types", + ] + .iter() + .any(|dir| repo_path.ends_with(dir)) +} + +fn insert_after( + map: Map, + after: &str, + key: &str, + value: Value, +) -> Map { + let mut entries = map.into_iter().collect::>(); + let after_index = entries + .iter() + .position(|(k, _)| k == after) + .unwrap_or(entries.len() - 1) + + 1; + entries.insert(after_index, (key.to_string(), value)); + entries.into_iter().collect() +} + +pub fn generate_grammar_files( + repo_path: &Path, + language_name: &str, + generate_bindings: bool, +) -> Result<()> { + let dashed_language_name = language_name.to_kebab_case(); + + // TODO: remove legacy code updates in v0.24.0 + + // Create or update package.json + let package_json_path_state = missing_path_else( + repo_path.join("package.json"), + |path| generate_file(path, PACKAGE_JSON_TEMPLATE, dashed_language_name.as_str()), + |path| { + let package_json_str = + fs::read_to_string(path).with_context(|| "Failed to read package.json")?; + let mut package_json = serde_json::from_str::>(&package_json_str) + .with_context(|| "Failed to parse package.json")?; + if generate_bindings { + let mut updated = false; + + let dependencies = package_json + .entry("dependencies".to_string()) + .or_insert_with(|| Value::Object(Map::new())) + .as_object_mut() + .unwrap(); + if dependencies.remove("nan").is_some() { + eprintln!("Replacing nan dependency with node-addon-api in package.json"); + dependencies.insert("node-addon-api".to_string(), "^7.1.0".into()); + updated = true; + } + if !dependencies.contains_key("node-gyp-build") { + eprintln!("Adding node-gyp-build dependency to package.json"); + dependencies.insert("node-gyp-build".to_string(), "^4.8.0".into()); + updated = true; + } + + let dev_dependencies = package_json + .entry("devDependencies".to_string()) + .or_insert_with(|| Value::Object(Map::new())) + .as_object_mut() + .unwrap(); + if !dev_dependencies.contains_key("prebuildify") { + eprintln!("Adding prebuildify devDependency to package.json"); + dev_dependencies.insert("prebuildify".to_string(), "^6.0.0".into()); + updated = true; + } + + let scripts = package_json + .entry("scripts".to_string()) + .or_insert_with(|| Value::Object(Map::new())) + .as_object_mut() + .unwrap(); + match scripts.get("install") { + None => { + eprintln!("Adding an install script to package.json"); + scripts.insert("install".to_string(), "node-gyp-build".into()); + updated = true; + } + Some(Value::String(v)) if v != "node-gyp-build" => { + eprintln!("Updating the install script in package.json"); + scripts.insert("install".to_string(), "node-gyp-build".into()); + updated = true; + } + Some(_) => {} + } + if !scripts.contains_key("prebuildify") { + eprintln!("Adding a prebuildify script to package.json"); + scripts.insert( + "prebuildify".to_string(), + "prebuildify --napi --strip".into(), + ); + updated = true; + } + + // insert `peerDependencies` after `dependencies` + if !package_json.contains_key("peerDependencies") { + eprintln!("Adding peerDependencies to package.json"); + package_json = insert_after( + package_json, + "dependencies", + "peerDependencies", + json!({"tree-sitter": "^0.21.0"}), + ); + + package_json = insert_after( + package_json, + "peerDependencies", + "peerDependenciesMeta", + json!({"tree_sitter": {"optional": true}}), + ); + updated = true; + } + + // insert `types` right after `main` + if !package_json.contains_key("types") { + eprintln!("Adding types to package.json"); + package_json = + insert_after(package_json, "main", "types", "bindings/node".into()); + updated = true; + } + + // insert `files` right after `keywords` + if !package_json.contains_key("files") { + eprintln!("Adding files to package.json"); + package_json = insert_after( + package_json, + "keywords", + "files", + json!([ + "grammar.js", + "binding.gyp", + "prebuilds/**", + "bindings/node/*", + "queries/*", + "src/**", + ]), + ); + updated = true; + } + + // insert `tree-sitter` at the end + if !package_json.contains_key("tree-sitter") { + eprintln!("Adding a `tree-sitter` section to package.json"); + package_json.insert( + "tree-sitter".to_string(), + json!([{ + "scope": format!("source.{language_name}"), + "injection-regex": format!("^{language_name}$"), + }]), + ); + updated = true; + } + + if updated { + let mut package_json_str = serde_json::to_string_pretty(&package_json)?; + package_json_str.push('\n'); + write_file(path, package_json_str)?; + } + } + + Ok(()) + }, + )?; + + let package_json = match lookup_package_json_for_path(package_json_path_state.as_path()) { + Ok((_, p)) => p, + Err(e) if generate_bindings => return Err(e), + _ => return Ok(()), + }; + + // Do not create a grammar.js file in a repo with multiple language configs + if !package_json.has_multiple_language_configs() { + missing_path(repo_path.join("grammar.js"), |path| { + generate_file(path, GRAMMAR_JS_TEMPLATE, language_name) + })?; + } + + if !generate_bindings { + // our job is done + return Ok(()); + } + + // Write .gitignore file + missing_path(repo_path.join(".gitignore"), |path| { + generate_file(path, GITIGNORE_TEMPLATE, language_name) + })?; + + // Write .gitattributes file + missing_path(repo_path.join(".gitattributes"), |path| { + generate_file(path, GITATTRIBUTES_TEMPLATE, language_name) + })?; + + // Write .editorconfig file + missing_path(repo_path.join(".editorconfig"), |path| { + generate_file(path, EDITORCONFIG_TEMPLATE, language_name) + })?; + + let bindings_dir = repo_path.join("bindings"); + + // Generate Rust bindings + missing_path(bindings_dir.join("rust"), create_dir)?.apply(|path| { + missing_path(path.join("lib.rs"), |path| { + generate_file(path, LIB_RS_TEMPLATE, language_name) + })?; + + missing_path_else( + path.join("build.rs"), + |path| generate_file(path, BUILD_RS_TEMPLATE, language_name), + |path| { + let build_rs = + fs::read_to_string(path).with_context(|| "Failed to read build.rs")?; + if !build_rs.contains("-utf-8") { + let index = build_rs + .find(" let parser_path = src_dir.join(\"parser.c\")") + .ok_or_else(|| anyhow!(indoc!{ + "Failed to auto-update build.rs with the `/utf-8` flag for windows. + To fix this, remove `bindings/rust/build.rs` and re-run `tree-sitter generate`"}))?; + + let build_rs = format!( + "{}{}{}\n{}", + &build_rs[..index], + " #[cfg(target_env = \"msvc\")]\n", + " c_config.flag(\"-utf-8\");\n", + &build_rs[index..] + ); + + write_file(path, build_rs)?; + eprintln!("Updated build.rs with the /utf-8 flag for Windows compilation"); + } + Ok(()) + }, + )?; + + missing_path(repo_path.join("Cargo.toml"), |path| { + generate_file(path, CARGO_TOML_TEMPLATE, dashed_language_name.as_str()) + })?; + + Ok(()) + })?; + + // Generate Node bindings + missing_path(bindings_dir.join("node"), create_dir)?.apply(|path| { + missing_path_else( + path.join("index.js"), + |path| generate_file(path, INDEX_JS_TEMPLATE, language_name), + |path| { + let index_js = + fs::read_to_string(path).with_context(|| "Failed to read index.js")?; + if index_js.contains("../../build/Release") { + eprintln!("Replacing index.js with new binding API"); + generate_file(path, INDEX_JS_TEMPLATE, language_name)?; + } + Ok(()) + }, + )?; + + missing_path(path.join("index.d.ts"), |path| { + generate_file(path, INDEX_D_TS_TEMPLATE, language_name) + })?; + + missing_path_else( + path.join("binding.cc"), + |path| generate_file(path, JS_BINDING_CC_TEMPLATE, language_name), + |path| { + let binding_cc = + fs::read_to_string(path).with_context(|| "Failed to read binding.cc")?; + if binding_cc.contains("NAN_METHOD(New) {}") { + eprintln!("Replacing binding.cc with new binding API"); + generate_file(path, JS_BINDING_CC_TEMPLATE, language_name)?; + } + Ok(()) + }, + )?; + + // Create binding.gyp, or update it with new binding API. + missing_path_else( + repo_path.join("binding.gyp"), + |path| generate_file(path, BINDING_GYP_TEMPLATE, language_name), + |path| { + let binding_gyp = + fs::read_to_string(path).with_context(|| "Failed to read binding.gyp")?; + if binding_gyp.contains("require('nan')") { + eprintln!("Replacing binding.gyp with new binding API"); + generate_file(path, BINDING_GYP_TEMPLATE, language_name)?; + } + Ok(()) + }, + )?; + + Ok(()) + })?; + + // Generate C bindings + missing_path(bindings_dir.join("c"), create_dir)?.apply(|path| { + missing_path( + path.join(format!("tree-sitter-{language_name}.h")), + |path| generate_file(path, PARSER_NAME_H_TEMPLATE, language_name), + )?; + + missing_path( + path.join(format!("tree-sitter-{language_name}.pc.in")), + |path| generate_file(path, PARSER_NAME_PC_IN_TEMPLATE, language_name), + )?; + + missing_path(repo_path.join("Makefile"), |path| { + generate_file(path, MAKEFILE_TEMPLATE, language_name) + })?; + + Ok(()) + })?; + + // Generate Go bindings + missing_path(bindings_dir.join("go"), create_dir)?.apply(|path| { + missing_path(path.join("binding.go"), |path| { + generate_file(path, BINDING_GO_TEMPLATE, language_name) + })?; + + missing_path(path.join("binding_test.go"), |path| { + generate_file(path, BINDING_GO_TEST_TEMPLATE, language_name) + })?; + + missing_path(path.join("go.mod"), |path| { + generate_file(path, GO_MOD_TEMPLATE, language_name) + })?; + + Ok(()) + })?; + + // Generate Python bindings + missing_path(bindings_dir.join("python"), create_dir)?.apply(|path| { + let lang_path = path.join(format!("tree_sitter_{}", language_name.to_snake_case())); + missing_path(&lang_path, create_dir)?; + + missing_path(lang_path.join("binding.c"), |path| { + generate_file(path, PY_BINDING_C_TEMPLATE, language_name) + })?; + + missing_path(lang_path.join("__init__.py"), |path| { + generate_file(path, INIT_PY_TEMPLATE, language_name) + })?; + + missing_path(lang_path.join("__init__.pyi"), |path| { + generate_file(path, INIT_PYI_TEMPLATE, language_name) + })?; + + missing_path(lang_path.join("py.typed"), |path| { + generate_file(path, "", language_name) // py.typed is empty + })?; + + missing_path(repo_path.join("setup.py"), |path| { + generate_file(path, SETUP_PY_TEMPLATE, language_name) + })?; + + missing_path(repo_path.join("pyproject.toml"), |path| { + generate_file(path, PYPROJECT_TOML_TEMPLATE, dashed_language_name.as_str()) + })?; + + Ok(()) + })?; + + // Generate Swift bindings + missing_path(bindings_dir.join("swift"), create_dir)?.apply(|path| { + let lang_path = path.join(format!("TreeSitter{}", language_name.to_upper_camel_case())); + missing_path(&lang_path, create_dir)?; + + missing_path(lang_path.join(format!("{language_name}.h")), |path| { + generate_file(path, PARSER_NAME_H_TEMPLATE, language_name) + })?; + + missing_path(repo_path.join("Package.swift"), |path| { + generate_file(path, PACKAGE_SWIFT_TEMPLATE, language_name) + })?; + + Ok(()) + })?; + + Ok(()) +} + +pub fn lookup_package_json_for_path(path: &Path) -> Result<(PathBuf, PackageJSON)> { + let mut pathbuf = path.to_owned(); + loop { + let package_json = pathbuf + .exists() + .then(|| -> Result { + let file = + File::open(pathbuf.as_path()).with_context(|| "Failed to open package.json")?; + serde_json::from_reader(BufReader::new(file)).context( + "Failed to parse package.json, is the `tree-sitter` section malformed?", + ) + }) + .transpose()?; + if let Some(package_json) = package_json { + if package_json.tree_sitter.is_some() { + return Ok((pathbuf, package_json)); + } + } + pathbuf.pop(); // package.json + if !pathbuf.pop() { + return Err(anyhow!(concat!( + "Failed to locate a package.json file that has a \"tree-sitter\" section,", + " please ensure you have one, and if you don't then consult the docs", + ))); + } + pathbuf.push("package.json"); + } +} + +fn generate_file(path: &Path, template: &str, language_name: &str) -> Result<()> { + write_file( + path, + template + .replace( + CAMEL_PARSER_NAME_PLACEHOLDER, + &language_name.to_upper_camel_case(), + ) + .replace( + UPPER_PARSER_NAME_PLACEHOLDER, + &language_name.to_shouty_snake_case(), + ) + .replace( + LOWER_PARSER_NAME_PLACEHOLDER, + &language_name.to_snake_case(), + ) + .replace(PARSER_NAME_PLACEHOLDER, language_name) + .replace(CLI_VERSION_PLACEHOLDER, CLI_VERSION) + .replace(RUST_BINDING_VERSION_PLACEHOLDER, RUST_BINDING_VERSION), + ) +} + +fn create_dir(path: &Path) -> Result<()> { + fs::create_dir_all(path) + .with_context(|| format!("Failed to create {:?}", path.to_string_lossy())) +} + +#[derive(PartialEq, Eq, Debug)] +enum PathState

+where + P: AsRef, +{ + Exists(P), + Missing(P), +} + +#[allow(dead_code)] +impl

PathState

+where + P: AsRef, +{ + fn exists(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> { + if let Self::Exists(path) = self { + action(path.as_ref())?; + } + Ok(self) + } + + fn missing(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> { + if let Self::Missing(path) = self { + action(path.as_ref())?; + } + Ok(self) + } + + fn apply(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> { + action(self.as_path())?; + Ok(self) + } + + fn apply_state(&self, mut action: impl FnMut(&Self) -> Result<()>) -> Result<&Self> { + action(self)?; + Ok(self) + } + + fn as_path(&self) -> &Path { + match self { + Self::Exists(path) | Self::Missing(path) => path.as_ref(), + } + } +} + +fn missing_path(path: P, mut action: F) -> Result> +where + P: AsRef, + F: FnMut(&Path) -> Result<()>, +{ + let path_ref = path.as_ref(); + if !path_ref.exists() { + action(path_ref)?; + Ok(PathState::Missing(path)) + } else { + Ok(PathState::Exists(path)) + } +} + +fn missing_path_else(path: P, mut action: T, mut else_action: F) -> Result> +where + P: AsRef, + T: FnMut(&Path) -> Result<()>, + F: FnMut(&Path) -> Result<()>, +{ + let path_ref = path.as_ref(); + if !path_ref.exists() { + action(path_ref)?; + Ok(PathState::Missing(path)) + } else { + else_action(path_ref)?; + Ok(PathState::Exists(path)) + } +} + +impl PackageJSON { + fn has_multiple_language_configs(&self) -> bool { + self.tree_sitter.as_ref().is_some_and(|c| c.len() > 1) + } +} diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index db8d852..1f3b907 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -1,10 +1,12 @@ -use super::nfa::Nfa; -use super::rules::{Alias, Associativity, Precedence, Rule, Symbol}; -use std::collections::HashMap; -use std::fmt; +use std::{collections::HashMap, fmt}; + +use super::{ + nfa::Nfa, + rules::{Alias, Associativity, Precedence, Rule, Symbol}, +}; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) enum VariableType { +pub enum VariableType { Hidden, Auxiliary, Anonymous, @@ -14,20 +16,20 @@ pub(crate) enum VariableType { // Input grammar #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct Variable { +pub struct Variable { pub name: String, pub kind: VariableType, pub rule: Rule, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub(crate) enum PrecedenceEntry { +pub enum PrecedenceEntry { Name(String), Symbol(String), } #[derive(Debug, Default, PartialEq, Eq)] -pub(crate) struct InputGrammar { +pub struct InputGrammar { pub name: String, pub variables: Vec, pub extra_symbols: Vec, @@ -42,7 +44,7 @@ pub(crate) struct InputGrammar { // Extracted lexical grammar #[derive(Debug, PartialEq, Eq)] -pub(crate) struct LexicalVariable { +pub struct LexicalVariable { pub name: String, pub kind: VariableType, pub implicit_precedence: i32, @@ -50,7 +52,7 @@ pub(crate) struct LexicalVariable { } #[derive(Debug, Default, PartialEq, Eq)] -pub(crate) struct LexicalGrammar { +pub struct LexicalGrammar { pub nfa: Nfa, pub variables: Vec, } @@ -58,7 +60,7 @@ pub(crate) struct LexicalGrammar { // Extracted syntax grammar #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct ProductionStep { +pub struct ProductionStep { pub symbol: Symbol, pub precedence: Precedence, pub associativity: Option, @@ -67,33 +69,33 @@ pub(crate) struct ProductionStep { } #[derive(Clone, Debug, Default, PartialEq, Eq)] -pub(crate) struct Production { +pub struct Production { pub steps: Vec, pub dynamic_precedence: i32, } #[derive(Default)] -pub(crate) struct InlinedProductionMap { +pub struct InlinedProductionMap { pub productions: Vec, pub production_map: HashMap<(*const Production, u32), Vec>, } #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct SyntaxVariable { +pub struct SyntaxVariable { pub name: String, pub kind: VariableType, pub productions: Vec, } #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct ExternalToken { +pub struct ExternalToken { pub name: String, pub kind: VariableType, pub corresponding_internal_token: Option, } #[derive(Debug, Default)] -pub(crate) struct SyntaxGrammar { +pub struct SyntaxGrammar { pub variables: Vec, pub extra_symbols: Vec, pub expected_conflicts: Vec>, @@ -106,7 +108,7 @@ pub(crate) struct SyntaxGrammar { #[cfg(test)] impl ProductionStep { - pub(crate) fn new(symbol: Symbol) -> Self { + pub const fn new(symbol: Symbol) -> Self { Self { symbol, precedence: Precedence::None, @@ -116,11 +118,7 @@ impl ProductionStep { } } - pub(crate) fn with_prec( - self, - precedence: Precedence, - associativity: Option, - ) -> Self { + pub fn with_prec(self, precedence: Precedence, associativity: Option) -> Self { Self { symbol: self.symbol, precedence, @@ -130,7 +128,7 @@ impl ProductionStep { } } - pub(crate) fn with_alias(self, value: &str, is_named: bool) -> Self { + pub fn with_alias(self, value: &str, is_named: bool) -> Self { Self { symbol: self.symbol, precedence: self.precedence, @@ -142,7 +140,7 @@ impl ProductionStep { field_name: self.field_name, } } - pub(crate) fn with_field_name(self, name: &str) -> Self { + pub fn with_field_name(self, name: &str) -> Self { Self { symbol: self.symbol, precedence: self.precedence, @@ -155,7 +153,7 @@ impl ProductionStep { impl Production { pub fn first_symbol(&self) -> Option { - self.steps.first().map(|s| s.symbol.clone()) + self.steps.first().map(|s| s.symbol) } } @@ -195,24 +193,24 @@ impl Variable { } impl VariableType { - pub fn is_visible(&self) -> bool { - *self == VariableType::Named || *self == VariableType::Anonymous + pub fn is_visible(self) -> bool { + self == Self::Named || self == Self::Anonymous } } impl LexicalGrammar { pub fn variable_indices_for_nfa_states<'a>( &'a self, - state_ids: &'a Vec, + state_ids: &'a [u32], ) -> impl Iterator + 'a { let mut prev = None; state_ids.iter().filter_map(move |state_id| { let variable_id = self.variable_index_for_nfa_state(*state_id); - if prev != Some(variable_id) { + if prev == Some(variable_id) { + None + } else { prev = Some(variable_id); prev - } else { - None } }) } @@ -246,7 +244,7 @@ impl InlinedProductionMap { .map(|production_indices| { production_indices .iter() - .cloned() + .copied() .map(move |index| &self.productions[index]) }) } @@ -255,8 +253,8 @@ impl InlinedProductionMap { impl fmt::Display for PrecedenceEntry { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - PrecedenceEntry::Name(n) => write!(f, "'{}'", n), - PrecedenceEntry::Symbol(s) => write!(f, "$.{}", s), + Self::Name(n) => write!(f, "'{n}'"), + Self::Symbol(s) => write!(f, "$.{s}"), } } } diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 4838828..1d3b9e4 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -1,7 +1,24 @@ -mod binding_files; +use std::{ + env, fs, + io::Write, + path::{Path, PathBuf}, + process::{Command, Stdio}, +}; + +use anyhow::{anyhow, Context, Result}; +use build_tables::build_tables; +use grammar_files::path_in_ignore; +use grammars::InputGrammar; +use lazy_static::lazy_static; +use parse_grammar::parse_grammar; +use prepare_grammar::prepare_grammar; +use regex::{Regex, RegexBuilder}; +use render::render_c_code; +use semver::Version; + mod build_tables; -mod char_tree; mod dedup; +mod grammar_files; mod grammars; mod nfa; mod node_types; @@ -11,20 +28,7 @@ mod render; mod rules; mod tables; -use self::build_tables::build_tables; -use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; -use self::parse_grammar::parse_grammar; -use self::prepare_grammar::prepare_grammar; -use self::render::render_c_code; -use self::rules::AliasMap; -use anyhow::{anyhow, Context, Result}; -use lazy_static::lazy_static; -use regex::{Regex, RegexBuilder}; -use semver::Version; -use std::fs; -use std::io::Write; -use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; +pub use grammar_files::lookup_package_json_for_path; lazy_static! { static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*") @@ -38,13 +42,55 @@ struct GeneratedParser { node_types_json: String, } +pub const ALLOC_HEADER: &str = include_str!("./templates/alloc.h"); + pub fn generate_parser_in_directory( - repo_path: &PathBuf, + repo_path: &Path, grammar_path: Option<&str>, abi_version: usize, generate_bindings: bool, report_symbol_name: Option<&str>, + js_runtime: Option<&str>, ) -> Result<()> { + let mut repo_path = repo_path.to_owned(); + let mut grammar_path = grammar_path; + + // Populate a new empty grammar directory. + if let Some(path) = grammar_path { + let path = PathBuf::from(path); + if !path + .try_exists() + .with_context(|| "Some error with specified path")? + { + fs::create_dir_all(&path)?; + grammar_path = None; + repo_path = path; + } + } + + if repo_path.is_dir() && !repo_path.join("grammar.js").exists() && !path_in_ignore(&repo_path) { + if let Some(dir_name) = repo_path + .file_name() + .map(|x| x.to_string_lossy().to_ascii_lowercase()) + { + if let Some(language_name) = dir_name + .strip_prefix("tree-sitter-") + .or_else(|| Some(dir_name.as_ref())) + { + grammar_files::generate_grammar_files(&repo_path, language_name, false)?; + } + } + } + + // Read the grammar.json. + let grammar_json = if let Some(path) = grammar_path { + load_grammar_file(path.as_ref(), js_runtime)? + } else { + let grammar_js_path = + grammar_path.map_or(repo_path.join("grammar.js"), std::convert::Into::into); + load_grammar_file(&grammar_js_path, js_runtime)? + }; + let src_path = repo_path.join("src"); let header_path = src_path.join("tree_sitter"); @@ -52,45 +98,28 @@ pub fn generate_parser_in_directory( fs::create_dir_all(&src_path)?; fs::create_dir_all(&header_path)?; - // Read the grammar.json. - let grammar_json; - match grammar_path { - Some(path) => { - grammar_json = load_grammar_file(path.as_ref())?; - } - None => { - let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into()); - grammar_json = load_grammar_file(&grammar_js_path)?; - fs::write(&src_path.join("grammar.json"), &grammar_json)?; - } + if grammar_path.is_none() { + fs::write(src_path.join("grammar.json"), &grammar_json) + .with_context(|| format!("Failed to write grammar.json to {src_path:?}"))?; } // Parse and preprocess the grammar. let input_grammar = parse_grammar(&grammar_json)?; - let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = - prepare_grammar(&input_grammar)?; - let language_name = input_grammar.name; // Generate the parser and related files. let GeneratedParser { c_code, node_types_json, - } = generate_parser_for_grammar_with_opts( - &language_name, - syntax_grammar, - lexical_grammar, - inlines, - simple_aliases, - abi_version, - report_symbol_name, - )?; + } = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?; write_file(&src_path.join("parser.c"), c_code)?; write_file(&src_path.join("node-types.json"), node_types_json)?; + write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?; + write_file(&header_path.join("array.h"), tree_sitter::ARRAY_HEADER)?; write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?; - if generate_bindings { - binding_files::generate_binding_files(&repo_path, &language_name)?; + if !path_in_ignore(&repo_path) { + grammar_files::generate_grammar_files(&repo_path, &input_grammar.name, generate_bindings)?; } Ok(()) @@ -99,29 +128,18 @@ pub fn generate_parser_in_directory( pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> { let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n"); let input_grammar = parse_grammar(&grammar_json)?; - let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = - prepare_grammar(&input_grammar)?; - let parser = generate_parser_for_grammar_with_opts( - &input_grammar.name, - syntax_grammar, - lexical_grammar, - inlines, - simple_aliases, - tree_sitter::LANGUAGE_VERSION, - None, - )?; - Ok((input_grammar.name, parser.c_code)) + let parser = + generate_parser_for_grammar_with_opts(&input_grammar, tree_sitter::LANGUAGE_VERSION, None)?; + Ok((input_grammar.name.clone(), parser.c_code)) } fn generate_parser_for_grammar_with_opts( - name: &String, - syntax_grammar: SyntaxGrammar, - lexical_grammar: LexicalGrammar, - inlines: InlinedProductionMap, - simple_aliases: AliasMap, + input_grammar: &InputGrammar, abi_version: usize, report_symbol_name: Option<&str>, ) -> Result { + let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = + prepare_grammar(input_grammar)?; let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?; let node_types_json = node_types::generate_node_types_json( @@ -130,7 +148,7 @@ fn generate_parser_for_grammar_with_opts( &simple_aliases, &variable_info, ); - let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables( + let tables = build_tables( &syntax_grammar, &lexical_grammar, &simple_aliases, @@ -139,11 +157,8 @@ fn generate_parser_for_grammar_with_opts( report_symbol_name, )?; let c_code = render_c_code( - name, - parse_table, - main_lex_table, - keyword_lex_table, - keyword_capture_token, + &input_grammar.name, + tables, syntax_grammar, lexical_grammar, simple_aliases, @@ -155,32 +170,40 @@ fn generate_parser_for_grammar_with_opts( }) } -pub fn load_grammar_file(grammar_path: &Path) -> Result { +pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result { + if grammar_path.is_dir() { + return Err(anyhow!( + "Path to a grammar file with `.js` or `.json` extension is required" + )); + } match grammar_path.extension().and_then(|e| e.to_str()) { - Some("js") => Ok(load_js_grammar_file(grammar_path)?), - Some("json") => Ok(fs::read_to_string(grammar_path)?), - _ => Err(anyhow!( - "Unknown grammar file extension: {:?}", - grammar_path - )), + Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime) + .with_context(|| "Failed to load grammar.js")?), + Some("json") => { + Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?) + } + _ => Err(anyhow!("Unknown grammar file extension: {grammar_path:?}",)), } } -fn load_js_grammar_file(grammar_path: &Path) -> Result { +fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result { let grammar_path = fs::canonicalize(grammar_path)?; - let mut node_process = Command::new("node") + + let js_runtime = js_runtime.unwrap_or("node"); + + let mut node_process = Command::new(js_runtime) .env("TREE_SITTER_GRAMMAR_PATH", grammar_path) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .spawn() - .expect("Failed to run `node`"); + .with_context(|| format!("Failed to run `{js_runtime}`"))?; let mut node_stdin = node_process .stdin .take() - .expect("Failed to open stdin for node"); + .with_context(|| format!("Failed to open stdin for {js_runtime}"))?; let cli_version = Version::parse(env!("CARGO_PKG_VERSION")) - .expect("Could not parse this package's version as semver."); + .with_context(|| "Could not parse this package's version as semver.")?; write!( node_stdin, "global.TREE_SITTER_CLI_VERSION_MAJOR = {}; @@ -188,24 +211,43 @@ fn load_js_grammar_file(grammar_path: &Path) -> Result { global.TREE_SITTER_CLI_VERSION_PATCH = {};", cli_version.major, cli_version.minor, cli_version.patch, ) - .expect("Failed to write tree-sitter version to node's stdin"); + .with_context(|| format!("Failed to write tree-sitter version to {js_runtime}'s stdin"))?; let javascript_code = include_bytes!("./dsl.js"); node_stdin .write(javascript_code) - .expect("Failed to write grammar dsl to node's stdin"); + .with_context(|| format!("Failed to write grammar dsl to {js_runtime}'s stdin"))?; drop(node_stdin); let output = node_process .wait_with_output() - .expect("Failed to read output from node"); + .with_context(|| format!("Failed to read output from {js_runtime}"))?; match output.status.code() { - None => panic!("Node process was killed"), - Some(0) => {} - Some(code) => return Err(anyhow!("Node process exited with status {}", code)), - } + None => panic!("{js_runtime} process was killed"), + Some(0) => { + let stdout = String::from_utf8(output.stdout) + .with_context(|| format!("Got invalid UTF8 from {js_runtime}"))?; + + let mut grammar_json = &stdout[..]; + + if let Some(pos) = stdout.rfind('\n') { + // If there's a newline, split the last line from the rest of the output + let node_output = &stdout[..pos]; + grammar_json = &stdout[pos + 1..]; - let mut result = String::from_utf8(output.stdout).expect("Got invalid UTF8 from node"); - result.push('\n'); - Ok(result) + let mut stdout = std::io::stdout().lock(); + stdout.write_all(node_output.as_bytes())?; + stdout.write_all(b"\n")?; + stdout.flush()?; + } + + Ok(serde_json::to_string_pretty( + &serde_json::from_str::(grammar_json) + .with_context(|| "Failed to parse grammar JSON")?, + ) + .with_context(|| "Failed to serialize grammar JSON")? + + "\n") + } + Some(code) => Err(anyhow!("{js_runtime} process exited with status {code}")), + } } fn write_file(path: &Path, body: impl AsRef<[u8]>) -> Result<()> { diff --git a/cli/src/generate/nfa.rs b/cli/src/generate/nfa.rs index 6be3608..6f0e1ee 100644 --- a/cli/src/generate/nfa.rs +++ b/cli/src/generate/nfa.rs @@ -1,13 +1,14 @@ -use std::char; -use std::cmp::max; -use std::cmp::Ordering; -use std::collections::HashSet; -use std::fmt; -use std::mem::swap; -use std::ops::Range; +use std::{ + char, + cmp::{max, Ordering}, + fmt, + iter::ExactSizeIterator, + mem::{self, swap}, + ops::{Range, RangeInclusive}, +}; /// A set of characters represented as a vector of ranges. -#[derive(Clone, PartialEq, Eq, Hash)] +#[derive(Clone, Default, PartialEq, Eq, Hash)] pub struct CharacterSet { ranges: Vec>, } @@ -28,7 +29,7 @@ pub enum NfaState { }, } -#[derive(PartialEq, Eq)] +#[derive(PartialEq, Eq, Default)] pub struct Nfa { pub states: Vec, } @@ -47,40 +48,36 @@ pub struct NfaTransition { pub states: Vec, } -impl Default for Nfa { - fn default() -> Self { - Self { states: Vec::new() } - } -} - const END: u32 = char::MAX as u32 + 1; impl CharacterSet { /// Create a character set with a single character. - pub fn empty() -> Self { - CharacterSet { ranges: Vec::new() } + pub const fn empty() -> Self { + Self { ranges: Vec::new() } } /// Create a character set with a given *inclusive* range of characters. + #[allow(clippy::single_range_in_vec_init)] pub fn from_range(mut first: char, mut last: char) -> Self { if first > last { swap(&mut first, &mut last); } - CharacterSet { + Self { ranges: vec![(first as u32)..(last as u32 + 1)], } } /// Create a character set with a single character. + #[allow(clippy::single_range_in_vec_init)] pub fn from_char(c: char) -> Self { - CharacterSet { + Self { ranges: vec![(c as u32)..(c as u32 + 1)], } } /// Create a character set containing all characters *not* present /// in this character set. - pub fn negate(mut self) -> CharacterSet { + pub fn negate(mut self) -> Self { let mut i = 0; let mut previous_end = 0; while i < self.ranges.len() { @@ -110,14 +107,19 @@ impl CharacterSet { self } - pub fn add(mut self, other: &CharacterSet) -> Self { + pub fn add(mut self, other: &Self) -> Self { let mut index = 0; for range in &other.ranges { - index = self.add_int_range(index, range.start as u32, range.end as u32); + index = self.add_int_range(index, range.start, range.end); } self } + pub fn assign(&mut self, other: &Self) { + self.ranges.clear(); + self.ranges.extend_from_slice(&other.ranges); + } + fn add_int_range(&mut self, mut i: usize, start: u32, end: u32) -> usize { while i < self.ranges.len() { let range = &mut self.ranges[i]; @@ -143,7 +145,7 @@ impl CharacterSet { i } - pub fn does_intersect(&self, other: &CharacterSet) -> bool { + pub fn does_intersect(&self, other: &Self) -> bool { let mut left_ranges = self.ranges.iter(); let mut right_ranges = other.ranges.iter(); let mut left_range = left_ranges.next(); @@ -163,7 +165,7 @@ impl CharacterSet { /// Get the set of characters that are present in both this set /// and the other set. Remove those common characters from both /// of the operands. - pub fn remove_intersection(&mut self, other: &mut CharacterSet) -> CharacterSet { + pub fn remove_intersection(&mut self, other: &mut Self) -> Self { let mut intersection = Vec::new(); let mut left_i = 0; let mut right_i = 0; @@ -209,29 +211,28 @@ impl CharacterSet { } } } - Ordering::Equal => { - // [ L ] - // [ R ] - if left.end < right.end { - intersection.push(left.start..left.end); - right.start = left.end; - self.ranges.remove(left_i); - } - // [ L ] - // [ R ] - else if left.end == right.end { - intersection.push(left.clone()); - self.ranges.remove(left_i); - other.ranges.remove(right_i); - } - // [ L ] - // [ R ] - else if left.end > right.end { - intersection.push(right.clone()); - left.start = right.end; - other.ranges.remove(right_i); - } + // [ L ] + // [ R ] + Ordering::Equal if left.end < right.end => { + intersection.push(left.start..left.end); + right.start = left.end; + self.ranges.remove(left_i); + } + // [ L ] + // [ R ] + Ordering::Equal if left.end == right.end => { + intersection.push(left.clone()); + self.ranges.remove(left_i); + other.ranges.remove(right_i); } + // [ L ] + // [ R ] + Ordering::Equal if left.end > right.end => { + intersection.push(right.clone()); + left.start = right.end; + other.ranges.remove(right_i); + } + Ordering::Equal => {} Ordering::Greater => { // [ L ] // [ R ] @@ -271,31 +272,43 @@ impl CharacterSet { } } } - CharacterSet { + Self { ranges: intersection, } } /// Produces a `CharacterSet` containing every character in `self` that is not present in /// `other`. - pub fn difference(mut self, mut other: CharacterSet) -> CharacterSet { + pub fn difference(mut self, mut other: Self) -> Self { self.remove_intersection(&mut other); self } /// Produces a `CharacterSet` containing every character that is in _exactly one_ of `self` or /// `other`, but is not present in both sets. - pub fn symmetric_difference(mut self, mut other: CharacterSet) -> CharacterSet { + pub fn symmetric_difference(mut self, mut other: Self) -> Self { self.remove_intersection(&mut other); self.add(&other) } - pub fn iter<'a>(&'a self) -> impl Iterator + 'a { - self.ranges.iter().flat_map(|r| r.clone()) + pub fn char_codes(&self) -> impl Iterator + '_ { + self.ranges.iter().flat_map(Clone::clone) + } + + pub fn chars(&self) -> impl Iterator + '_ { + self.char_codes().filter_map(char::from_u32) } - pub fn chars<'a>(&'a self) -> impl Iterator + 'a { - self.iter().filter_map(char::from_u32) + pub fn range_count(&self) -> usize { + self.ranges.len() + } + + pub fn ranges(&self) -> impl Iterator> + '_ { + self.ranges.iter().filter_map(|range| { + let start = range.clone().find_map(char::from_u32)?; + let end = (range.start..range.end).rev().find_map(char::from_u32)?; + Some(start..=end) + }) } pub fn is_empty(&self) -> bool { @@ -304,53 +317,68 @@ impl CharacterSet { /// Get a reduced list of character ranges, assuming that a given /// set of characters can be safely ignored. - pub fn simplify_ignoring<'a>( - &'a self, - ruled_out_characters: &'a HashSet, - ) -> Vec> { - let mut prev_range: Option> = None; - self.chars() - .map(|c| (c, false)) - .chain(Some(('\0', true))) - .filter_map(move |(c, done)| { - if done { - return prev_range.clone(); - } - if ruled_out_characters.contains(&(c as u32)) { - return None; - } - if let Some(range) = prev_range.clone() { - let mut prev_range_successor = range.end as u32 + 1; - while prev_range_successor < c as u32 { - if !ruled_out_characters.contains(&prev_range_successor) { - prev_range = Some(c..c); - return Some(range); + pub fn simplify_ignoring(&self, ruled_out_characters: &Self) -> Self { + let mut prev_range: Option> = None; + Self { + ranges: self + .ranges + .iter() + .map(|range| Some(range.clone())) + .chain([None]) + .filter_map(move |range| { + if let Some(range) = &range { + if ruled_out_characters.contains_codepoint_range(range.clone()) { + return None; + } + + if let Some(prev_range) = &mut prev_range { + if ruled_out_characters + .contains_codepoint_range(prev_range.end..range.start) + { + prev_range.end = range.end; + return None; + } } - prev_range_successor += 1; } - prev_range = Some(range.start..c); - None - } else { - prev_range = Some(c..c); - None - } - }) - .collect() + + let result = prev_range.clone(); + prev_range = range; + result + }) + .collect(), + } + } + + pub fn contains_codepoint_range(&self, seek_range: Range) -> bool { + let ix = match self.ranges.binary_search_by(|probe| { + if probe.end <= seek_range.start { + Ordering::Less + } else if probe.start > seek_range.start { + Ordering::Greater + } else { + Ordering::Equal + } + }) { + Ok(ix) | Err(ix) => ix, + }; + self.ranges.get(ix).map_or(false, |range| { + range.start <= seek_range.start && range.end >= seek_range.end + }) } pub fn contains(&self, c: char) -> bool { - self.ranges.iter().any(|r| r.contains(&(c as u32))) + self.contains_codepoint_range(c as u32..c as u32 + 1) } } impl Ord for CharacterSet { - fn cmp(&self, other: &CharacterSet) -> Ordering { + fn cmp(&self, other: &Self) -> Ordering { let count_cmp = self .ranges .iter() - .map(|r| r.len()) + .map(ExactSizeIterator::len) .sum::() - .cmp(&other.ranges.iter().map(|r| r.len()).sum()); + .cmp(&other.ranges.iter().map(ExactSizeIterator::len).sum()); if count_cmp != Ordering::Equal { return count_cmp; } @@ -368,12 +396,12 @@ impl Ord for CharacterSet { } } } - return Ordering::Equal; + Ordering::Equal } } impl PartialOrd for CharacterSet { - fn partial_cmp(&self, other: &CharacterSet) -> Option { + fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } @@ -386,11 +414,11 @@ impl fmt::Debug for CharacterSet { write!(f, "^ ")?; set = set.negate(); } - for (i, c) in set.chars().enumerate() { + for (i, range) in set.ranges().enumerate() { if i > 0 { write!(f, ", ")?; } - write!(f, "{:?}", c)?; + write!(f, "{range:?}")?; } write!(f, "]")?; Ok(()) @@ -398,8 +426,8 @@ impl fmt::Debug for CharacterSet { } impl Nfa { - pub fn new() -> Self { - Nfa { states: Vec::new() } + pub const fn new() -> Self { + Self { states: Vec::new() } } pub fn last_state_id(&self) -> u32 { @@ -409,9 +437,9 @@ impl Nfa { impl fmt::Debug for Nfa { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Nfa {{ states: {{\n")?; + writeln!(f, "Nfa {{ states: {{")?; for (i, state) in self.states.iter().enumerate() { - write!(f, " {}: {:?},\n", i, state)?; + writeln!(f, " {i}: {state:?},")?; } write!(f, "}} }}")?; Ok(()) @@ -434,7 +462,7 @@ impl<'a> NfaCursor<'a> { } pub fn force_reset(&mut self, states: Vec) { - self.state_ids = states + self.state_ids = states; } pub fn transition_chars(&self) -> impl Iterator { @@ -464,7 +492,7 @@ impl<'a> NfaCursor<'a> { fn group_transitions<'b>( iter: impl Iterator, ) -> Vec { - let mut result: Vec = Vec::new(); + let mut result = Vec::::new(); for (chars, is_sep, prec, state) in iter { let mut chars = chars.clone(); let mut i = 0; @@ -472,9 +500,8 @@ impl<'a> NfaCursor<'a> { let intersection = result[i].characters.remove_intersection(&mut chars); if !intersection.is_empty() { let mut intersection_states = result[i].states.clone(); - match intersection_states.binary_search(&state) { - Err(j) => intersection_states.insert(j, state), - _ => {} + if let Err(j) = intersection_states.binary_search(&state) { + intersection_states.insert(j, state); } let intersection_transition = NfaTransition { characters: intersection, @@ -500,25 +527,25 @@ impl<'a> NfaCursor<'a> { }); } } - result.sort_unstable_by(|a, b| a.characters.cmp(&b.characters)); let mut i = 0; - 'i_loop: while i < result.len() { + while i < result.len() { for j in 0..i { if result[j].states == result[i].states && result[j].is_separator == result[i].is_separator && result[j].precedence == result[i].precedence { - let mut characters = CharacterSet::empty(); - swap(&mut characters, &mut result[j].characters); + let characters = mem::take(&mut result[j].characters); result[j].characters = characters.add(&result[i].characters); result.remove(i); - continue 'i_loop; + i -= 1; + break; } } i += 1; } + result.sort_unstable_by(|a, b| a.characters.cmp(&b.characters)); result } @@ -801,15 +828,15 @@ mod tests { ], vec![ NfaTransition { - characters: CharacterSet::empty().add_char('a').add_range('c', 'd'), + characters: CharacterSet::empty().add_char('b').add_char('e'), precedence: 0, - states: vec![1], + states: vec![2], is_separator: false, }, NfaTransition { - characters: CharacterSet::empty().add_char('b').add_char('e'), + characters: CharacterSet::empty().add_char('a').add_range('c', 'd'), precedence: 0, - states: vec![2], + states: vec![1], is_separator: false, }, ], @@ -824,8 +851,7 @@ mod tests { .map(|(chars, is_sep, prec, state)| (chars, *is_sep, *prec, *state)) ), row.1, - "row {}", - i + "row {i}", ); } } @@ -966,15 +992,14 @@ mod tests { row.right ); - let symm_difference = row.left_only.clone().add(&mut row.right_only.clone()); + let symm_difference = row.left_only.clone().add(&row.right_only); assert_eq!( row.left.clone().symmetric_difference(row.right.clone()), symm_difference, - "row {}b: {:?} ~~ {:?}", - i, + "row {i}b: {:?} ~~ {:?}", row.left, row.right - ) + ); } } @@ -1035,7 +1060,8 @@ mod tests { } #[test] - fn test_character_set_get_ranges() { + #[allow(clippy::single_range_in_vec_init)] + fn test_character_set_simplify_ignoring() { struct Row { chars: Vec, ruled_out_chars: Vec, @@ -1058,24 +1084,46 @@ mod tests { ruled_out_chars: vec!['d', 'f', 'g'], expected_ranges: vec!['a'..'h', 'z'..'z'], }, + Row { + chars: vec!['a', 'b', 'c', 'g', 'h', 'i'], + ruled_out_chars: vec!['d', 'j'], + expected_ranges: vec!['a'..'c', 'g'..'i'], + }, + Row { + chars: vec!['c', 'd', 'e', 'g', 'h'], + ruled_out_chars: vec!['a', 'b', 'c', 'd', 'e', 'f'], + expected_ranges: vec!['g'..'h'], + }, + Row { + chars: vec!['I', 'N'], + ruled_out_chars: vec!['A', 'I', 'N', 'Z'], + expected_ranges: vec![], + }, ]; for Row { chars, ruled_out_chars, expected_ranges, - } in table.iter() + } in &table { let ruled_out_chars = ruled_out_chars - .into_iter() - .map(|c: &char| *c as u32) - .collect(); + .iter() + .fold(CharacterSet::empty(), |set, c| set.add_char(*c)); let mut set = CharacterSet::empty(); for c in chars { set = set.add_char(*c); } - let ranges = set.simplify_ignoring(&ruled_out_chars); - assert_eq!(ranges, *expected_ranges); + let actual = set.simplify_ignoring(&ruled_out_chars); + let expected = expected_ranges + .iter() + .fold(CharacterSet::empty(), |set, range| { + set.add_range(range.start, range.end) + }); + assert_eq!( + actual, expected, + "chars: {chars:?}, ruled out chars: {ruled_out_chars:?}" + ); } } } diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 4391898..25353e8 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -1,24 +1,30 @@ -use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; -use super::rules::{Alias, AliasMap, Symbol, SymbolType}; +use std::{ + cmp::Ordering, + collections::{BTreeMap, HashMap, HashSet}, +}; + use anyhow::{anyhow, Result}; use serde::Serialize; -use std::cmp::Ordering; -use std::collections::{BTreeMap, HashMap, HashSet}; + +use super::{ + grammars::{LexicalGrammar, SyntaxGrammar, VariableType}, + rules::{Alias, AliasMap, Symbol, SymbolType}, +}; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub(crate) enum ChildType { +pub enum ChildType { Normal(Symbol), Aliased(Alias), } #[derive(Clone, Debug, Default, PartialEq, Eq)] -pub(crate) struct FieldInfo { +pub struct FieldInfo { pub quantity: ChildQuantity, pub types: Vec, } #[derive(Clone, Debug, Default, PartialEq, Eq)] -pub(crate) struct VariableInfo { +pub struct VariableInfo { pub fields: HashMap, pub children: FieldInfo, pub children_without_fields: FieldInfo, @@ -26,7 +32,7 @@ pub(crate) struct VariableInfo { } #[derive(Debug, Serialize, PartialEq, Eq, Default, PartialOrd, Ord)] -pub(crate) struct NodeInfoJSON { +pub struct NodeInfoJSON { #[serde(rename = "type")] kind: String, named: bool, @@ -39,14 +45,14 @@ pub(crate) struct NodeInfoJSON { } #[derive(Clone, Debug, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub(crate) struct NodeTypeJSON { +pub struct NodeTypeJSON { #[serde(rename = "type")] kind: String, named: bool, } #[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct FieldInfoJSON { +pub struct FieldInfoJSON { multiple: bool, required: bool, types: Vec, @@ -61,7 +67,7 @@ pub struct ChildQuantity { impl Default for FieldInfoJSON { fn default() -> Self { - FieldInfoJSON { + Self { multiple: false, required: true, types: Vec::new(), @@ -76,23 +82,25 @@ impl Default for ChildQuantity { } impl ChildQuantity { - fn zero() -> Self { - ChildQuantity { + #[must_use] + const fn zero() -> Self { + Self { exists: false, required: false, multiple: false, } } - fn one() -> Self { - ChildQuantity { + #[must_use] + const fn one() -> Self { + Self { exists: true, required: true, multiple: false, } } - fn append(&mut self, other: ChildQuantity) { + fn append(&mut self, other: Self) { if other.exists { if self.exists || other.multiple { self.multiple = true; @@ -104,7 +112,7 @@ impl ChildQuantity { } } - fn union(&mut self, other: ChildQuantity) -> bool { + fn union(&mut self, other: Self) -> bool { let mut result = false; if !self.exists && other.exists { result = true; @@ -132,19 +140,18 @@ impl ChildQuantity { /// * `types` - The types of visible children the field can contain. /// * `optional` - Do `N` nodes always have this field? /// * `multiple` - Can `N` nodes have multiple children for this field? -/// 3. `children_without_fields` - The *other* named children of `N` that are -/// not associated with fields. Data regarding these children: +/// 3. `children_without_fields` - The *other* named children of `N` that are not associated with +/// fields. Data regarding these children: /// * `types` - The types of named children with no field. /// * `optional` - Do `N` nodes always have at least one named child with no field? /// * `multiple` - Can `N` nodes have multiple named children with no field? /// /// Each summary must account for some indirect factors: -/// 1. hidden nodes. When a parent node `N` has a hidden child `C`, the visible -/// children of `C` *appear* to be direct children of `N`. -/// 2. aliases. If a parent node type `M` is aliased as some other type `N`, -/// then nodes which *appear* to have type `N` may have internal structure based -/// on `M`. -pub(crate) fn get_variable_info( +/// 1. hidden nodes. When a parent node `N` has a hidden child `C`, the visible children of `C` +/// *appear* to be direct children of `N`. +/// 2. aliases. If a parent node type `M` is aliased as some other type `N`, then nodes which +/// *appear* to have type `N` may have internal structure based on `M`. +pub fn get_variable_info( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, default_aliases: &AliasMap, @@ -209,14 +216,15 @@ pub(crate) fn get_variable_info( let field_info = variable_info .fields .entry(field_name.clone()) - .or_insert(FieldInfo::default()); + .or_insert_with(FieldInfo::default); did_change |= extend_sorted(&mut field_info.types, Some(&child_type)); let production_field_quantity = production_field_quantities .entry(field_name) - .or_insert(ChildQuantity::zero()); + .or_insert_with(ChildQuantity::zero); - // Inherit the types and quantities of hidden children associated with fields. + // Inherit the types and quantities of hidden children associated with + // fields. if child_is_hidden && child_symbol.is_non_terminal() { let child_variable_info = &result[child_symbol.index]; did_change |= extend_sorted( @@ -252,13 +260,13 @@ pub(crate) fn get_variable_info( for (field_name, child_field_info) in &child_variable_info.fields { production_field_quantities .entry(field_name) - .or_insert(ChildQuantity::zero()) + .or_insert_with(ChildQuantity::zero) .append(child_field_info.quantity); did_change |= extend_sorted( &mut variable_info .fields .entry(field_name.clone()) - .or_insert(FieldInfo::default()) + .or_insert_with(FieldInfo::default) .types, &child_field_info.types, ); @@ -308,12 +316,12 @@ pub(crate) fn get_variable_info( .quantity .union(production_children_without_fields_quantity); - for (field_name, info) in variable_info.fields.iter_mut() { + for (field_name, info) in &mut variable_info.fields { did_change |= info.quantity.union( production_field_quantities .get(field_name) - .cloned() - .unwrap_or(ChildQuantity::zero()), + .copied() + .unwrap_or_else(ChildQuantity::zero), ); } } @@ -345,8 +353,8 @@ pub(crate) fn get_variable_info( .types .retain(child_type_is_visible); } - for variable_info in result.iter_mut() { - for (_, field_info) in variable_info.fields.iter_mut() { + for variable_info in &mut result { + for field_info in variable_info.fields.values_mut() { field_info.types.retain(child_type_is_visible); } variable_info.fields.retain(|_, v| !v.types.is_empty()); @@ -359,11 +367,11 @@ pub(crate) fn get_variable_info( Ok(result) } -pub(crate) fn generate_node_types_json( +pub fn generate_node_types_json( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, default_aliases: &AliasMap, - variable_info: &Vec, + variable_info: &[VariableInfo], ) -> Vec { let mut node_types_json = BTreeMap::new(); @@ -373,7 +381,7 @@ pub(crate) fn generate_node_types_json( named: alias.is_named, }, ChildType::Normal(symbol) => { - if let Some(alias) = default_aliases.get(&symbol) { + if let Some(alias) = default_aliases.get(symbol) { NodeTypeJSON { kind: alias.value.clone(), named: alias.is_named, @@ -408,15 +416,15 @@ pub(crate) fn generate_node_types_json( }; let populate_field_info_json = |json: &mut FieldInfoJSON, info: &FieldInfo| { - if info.types.len() > 0 { + if info.types.is_empty() { + json.required = false; + } else { json.multiple |= info.quantity.multiple; json.required &= info.quantity.required; json.types .extend(info.types.iter().map(child_type_to_node_type)); json.types.sort_unstable(); json.types.dedup(); - } else { - json.required = false; } }; @@ -432,7 +440,7 @@ pub(crate) fn generate_node_types_json( if !default_aliases.contains_key(extra_symbol) { aliases_by_symbol .entry(*extra_symbol) - .or_insert(HashSet::new()) + .or_insert_with(HashSet::new) .insert(None); } } @@ -441,7 +449,7 @@ pub(crate) fn generate_node_types_json( for step in &production.steps { aliases_by_symbol .entry(step.symbol) - .or_insert(HashSet::new()) + .or_insert_with(HashSet::new) .insert( step.alias .as_ref() @@ -451,7 +459,10 @@ pub(crate) fn generate_node_types_json( } } } - aliases_by_symbol.insert(Symbol::non_terminal(0), [None].iter().cloned().collect()); + aliases_by_symbol.insert( + Symbol::non_terminal(0), + std::iter::once(&None).cloned().collect(), + ); let mut subtype_map = Vec::new(); for (i, info) in variable_info.iter().enumerate() { @@ -516,10 +527,10 @@ pub(crate) fn generate_node_types_json( }); let fields_json = node_type_json.fields.as_mut().unwrap(); - for (new_field, field_info) in info.fields.iter() { + for (new_field, field_info) in &info.fields { let field_json = fields_json.entry(new_field.clone()).or_insert_with(|| { - // If another rule is aliased with the same name, and does *not* have this field, - // then this field cannot be required. + // If another rule is aliased with the same name, and does *not* have this + // field, then this field cannot be required. let mut field_json = FieldInfoJSON::default(); if node_type_existed { field_json.required = false; @@ -529,8 +540,8 @@ pub(crate) fn generate_node_types_json( populate_field_info_json(field_json, field_info); } - // If another rule is aliased with the same name, any fields that aren't present in this - // cannot be required. + // If another rule is aliased with the same name, any fields that aren't present in + // this cannot be required. for (existing_field, field_json) in fields_json.iter_mut() { if !info.fields.contains_key(existing_field) { field_json.required = false; @@ -558,7 +569,7 @@ pub(crate) fn generate_node_types_json( } }); - for (_, node_type_json) in node_types_json.iter_mut() { + for node_type_json in node_types_json.values_mut() { if node_type_json .children .as_ref() @@ -571,7 +582,7 @@ pub(crate) fn generate_node_types_json( process_supertypes(children, &subtype_map); } if let Some(fields) = &mut node_type_json.fields { - for (_, field_info) in fields.iter_mut() { + for field_info in fields.values_mut() { process_supertypes(field_info, &subtype_map); } } @@ -590,11 +601,11 @@ pub(crate) fn generate_node_types_json( .unwrap_or(&empty) .iter() .map(move |alias| { - if let Some(alias) = alias { - (&alias.value, alias.kind()) - } else { - (&variable.name, variable.kind) - } + alias + .as_ref() + .map_or((&variable.name, variable.kind), |alias| { + (&alias.value, alias.kind()) + }) }) }); let external_tokens = @@ -608,11 +619,9 @@ pub(crate) fn generate_node_types_json( .unwrap_or(&empty) .iter() .map(move |alias| { - if let Some(alias) = alias { + alias.as_ref().map_or((&token.name, token.kind), |alias| { (&alias.value, alias.kind()) - } else { - (&token.name, token.kind) - } + }) }) }); @@ -630,7 +639,7 @@ pub(crate) fn generate_node_types_json( children.required = false; } if let Some(fields) = &mut node_type_json.fields { - for (_, field) in fields.iter_mut() { + for field in fields.values_mut() { field.required = false; } } @@ -647,7 +656,7 @@ pub(crate) fn generate_node_types_json( } let mut result = node_types_json.into_iter().map(|e| e.1).collect::>(); - result.extend(anonymous_node_types.into_iter()); + result.extend(anonymous_node_types); result.sort_unstable_by(|a, b| { b.subtypes .is_some() @@ -663,10 +672,7 @@ pub(crate) fn generate_node_types_json( result } -fn process_supertypes( - info: &mut FieldInfoJSON, - subtype_map: &Vec<(NodeTypeJSON, Vec)>, -) { +fn process_supertypes(info: &mut FieldInfoJSON, subtype_map: &[(NodeTypeJSON, Vec)]) { for (supertype, subtypes) in subtype_map { if info.types.contains(supertype) { info.types.retain(|t| !subtypes.contains(t)); @@ -682,9 +688,9 @@ fn variable_type_for_child_type( match child_type { ChildType::Aliased(alias) => alias.kind(), ChildType::Normal(symbol) => { - if syntax_grammar.supertype_symbols.contains(&symbol) { + if syntax_grammar.supertype_symbols.contains(symbol) { VariableType::Named - } else if syntax_grammar.variables_to_inline.contains(&symbol) { + } else if syntax_grammar.variables_to_inline.contains(symbol) { VariableType::Hidden } else { match symbol.kind { @@ -700,11 +706,10 @@ fn variable_type_for_child_type( fn extend_sorted<'a, T>(vec: &mut Vec, values: impl IntoIterator) -> bool where - T: Clone + Eq + Ord, - T: 'a, + T: 'a + Clone + Eq + Ord, { values.into_iter().any(|value| { - if let Err(i) = vec.binary_search(&value) { + if let Err(i) = vec.binary_search(value) { vec.insert(i, value.clone()); true } else { @@ -716,15 +721,17 @@ where #[cfg(test)] mod tests { use super::*; - use crate::generate::grammars::{ - InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable, + use crate::generate::{ + grammars::{ + InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable, + }, + prepare_grammar::prepare_grammar, + rules::Rule, }; - use crate::generate::prepare_grammar::prepare_grammar; - use crate::generate::rules::Rule; #[test] fn test_node_types_simple() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![ Variable { name: "v1".to_string(), @@ -813,7 +820,7 @@ mod tests { #[test] fn test_node_types_simple_extras() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { extra_symbols: vec![Rule::named("v3")], variables: vec![ Variable { @@ -914,7 +921,7 @@ mod tests { #[test] fn test_node_types_with_supertypes() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { supertype_symbols: vec!["_v2".to_string()], variables: vec![ Variable { @@ -996,7 +1003,7 @@ mod tests { #[test] fn test_node_types_for_children_without_fields() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![ Variable { name: "v1".to_string(), @@ -1088,7 +1095,7 @@ mod tests { #[test] fn test_node_types_with_inlined_rules() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables_to_inline: vec!["v2".to_string()], variables: vec![ Variable { @@ -1138,7 +1145,7 @@ mod tests { #[test] fn test_node_types_for_aliased_nodes() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![ Variable { name: "thing".to_string(), @@ -1172,12 +1179,12 @@ mod tests { Variable { name: "identifier".to_string(), kind: VariableType::Named, - rule: Rule::pattern("\\w+"), + rule: Rule::pattern("\\w+", ""), }, Variable { name: "foo_identifier".to_string(), kind: VariableType::Named, - rule: Rule::pattern("[\\w-]+"), + rule: Rule::pattern("[\\w-]+", ""), }, ], ..Default::default() @@ -1208,7 +1215,7 @@ mod tests { #[test] fn test_node_types_with_multiple_valued_fields() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![ Variable { name: "a".to_string(), @@ -1270,13 +1277,13 @@ mod tests { #[test] fn test_node_types_with_fields_on_hidden_tokens() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![Variable { name: "script".to_string(), kind: VariableType::Named, rule: Rule::seq(vec![ - Rule::field("a".to_string(), Rule::pattern("hi")), - Rule::field("b".to_string(), Rule::pattern("bye")), + Rule::field("a".to_string(), Rule::pattern("hi", "")), + Rule::field("b".to_string(), Rule::pattern("bye", "")), ]), }], ..Default::default() @@ -1296,7 +1303,7 @@ mod tests { #[test] fn test_node_types_with_multiple_rules_same_alias_name() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![ Variable { name: "script".to_string(), @@ -1416,7 +1423,7 @@ mod tests { #[test] fn test_node_types_with_tokens_aliased_to_match_rules() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![ Variable { name: "a".to_string(), @@ -1766,9 +1773,9 @@ mod tests { ); } - fn get_node_types(grammar: InputGrammar) -> Vec { + fn get_node_types(grammar: &InputGrammar) -> Vec { let (syntax_grammar, lexical_grammar, _, default_aliases) = - prepare_grammar(&grammar).unwrap(); + prepare_grammar(grammar).unwrap(); let variable_info = get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap(); generate_node_types_json( @@ -1783,17 +1790,18 @@ mod tests { variables: Vec, supertype_symbols: Vec, ) -> SyntaxGrammar { - let mut syntax_grammar = SyntaxGrammar::default(); - syntax_grammar.variables = variables; - syntax_grammar.supertype_symbols = supertype_symbols; - syntax_grammar + SyntaxGrammar { + variables, + supertype_symbols, + ..SyntaxGrammar::default() + } } fn build_lexical_grammar() -> LexicalGrammar { let mut lexical_grammar = LexicalGrammar::default(); for i in 0..10 { lexical_grammar.variables.push(LexicalVariable { - name: format!("token_{}", i), + name: format!("token_{i}"), kind: VariableType::Named, implicit_precedence: 0, start_state: 0, diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index 7fda0b7..e801d53 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -1,12 +1,16 @@ -use super::grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType}; -use super::rules::{Precedence, Rule}; use anyhow::{anyhow, Result}; use serde::Deserialize; use serde_json::{Map, Value}; +use super::{ + grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType}, + rules::{Precedence, Rule}, +}; + #[derive(Deserialize)] #[serde(tag = "type")] #[allow(non_camel_case_types)] +#[allow(clippy::upper_case_acronyms)] enum RuleJSON { ALIAS { content: Box, @@ -19,6 +23,7 @@ enum RuleJSON { }, PATTERN { value: String, + flags: Option, }, SYMBOL { name: String, @@ -90,15 +95,15 @@ pub(crate) struct GrammarJSON { } pub(crate) fn parse_grammar(input: &str) -> Result { - let grammar_json: GrammarJSON = serde_json::from_str(&input)?; + let grammar_json = serde_json::from_str::(input)?; let mut variables = Vec::with_capacity(grammar_json.rules.len()); for (name, value) in grammar_json.rules { variables.push(Variable { - name: name.to_owned(), + name: name.clone(), kind: VariableType::Named, rule: parse_rule(serde_json::from_value(value)?), - }) + }); } let mut precedence_orderings = Vec::with_capacity(grammar_json.precedences.len()); @@ -113,12 +118,27 @@ pub(crate) fn parse_grammar(input: &str) -> Result { "Invalid rule in precedences array. Only strings and symbols are allowed" )) } - }) + }); } precedence_orderings.push(ordering); } - let extra_symbols = grammar_json.extras.into_iter().map(parse_rule).collect(); + let extra_symbols = grammar_json + .extras + .into_iter() + .try_fold(Vec::new(), |mut acc, item| { + let rule = parse_rule(item); + if let Rule::String(ref value) = rule { + if value.is_empty() { + return Err(anyhow!( + "Rules in the `extras` array must not contain empty strings" + )); + } + } + acc.push(rule); + Ok(acc) + })?; + let external_tokens = grammar_json.externals.into_iter().map(parse_rule).collect(); Ok(InputGrammar { @@ -143,7 +163,23 @@ fn parse_rule(json: RuleJSON) -> Rule { } => Rule::alias(parse_rule(*content), value, named), RuleJSON::BLANK => Rule::Blank, RuleJSON::STRING { value } => Rule::String(value), - RuleJSON::PATTERN { value } => Rule::Pattern(value), + RuleJSON::PATTERN { value, flags } => Rule::Pattern( + value, + flags.map_or(String::new(), |f| { + f.matches(|c| { + if c == 'i' { + true + } else { + // silently ignore unicode flags + if c != 'u' && c != 'v' { + eprintln!("Warning: unsupported flag {c}"); + } + false + } + }) + .collect() + }), + ), RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name), RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()), RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)), @@ -167,11 +203,11 @@ fn parse_rule(json: RuleJSON) -> Rule { } } -impl Into for PrecedenceValueJSON { - fn into(self) -> Precedence { - match self { - PrecedenceValueJSON::Integer(i) => Precedence::Integer(i), - PrecedenceValueJSON::Name(i) => Precedence::Name(i), +impl From for Precedence { + fn from(val: PrecedenceValueJSON) -> Self { + match val { + PrecedenceValueJSON::Integer(i) => Self::Integer(i), + PrecedenceValueJSON::Name(i) => Self::Name(i), } } } diff --git a/cli/src/generate/prepare_grammar/expand_repeats.rs b/cli/src/generate/prepare_grammar/expand_repeats.rs index 1979691..4b97e53 100644 --- a/cli/src/generate/prepare_grammar/expand_repeats.rs +++ b/cli/src/generate/prepare_grammar/expand_repeats.rs @@ -1,8 +1,10 @@ +use std::{collections::HashMap, mem}; + use super::ExtractedSyntaxGrammar; -use crate::generate::grammars::{Variable, VariableType}; -use crate::generate::rules::{Rule, Symbol}; -use std::collections::HashMap; -use std::mem; +use crate::generate::{ + grammars::{Variable, VariableType}, + rules::{Rule, Symbol}, +}; struct Expander { variable_name: String, @@ -24,7 +26,7 @@ impl Expander { // convert that rule itself into a binary tree structure instead of introducing // another auxiliary rule. if let (VariableType::Hidden, Rule::Repeat(repeated_content)) = (variable.kind, &rule) { - let inner_rule = self.expand_rule(&repeated_content); + let inner_rule = self.expand_rule(repeated_content); variable.rule = self.wrap_rule_in_binary_tree(Symbol::non_terminal(index), inner_rule); variable.kind = VariableType::Auxiliary; return true; @@ -57,7 +59,7 @@ impl Expander { params: params.clone(), }, - // For repetitions, introduce an auxiliary rule that contains the the + // For repetitions, introduce an auxiliary rule that contains the // repeated content, but can also contain a recursive binary tree structure. Rule::Repeat(content) => { let inner_rule = self.expand_rule(content); @@ -107,8 +109,8 @@ pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSy existing_repeats: HashMap::new(), }; - for (i, mut variable) in grammar.variables.iter_mut().enumerate() { - let expanded_top_level_repetition = expander.expand_variable(i, &mut variable); + for (i, variable) in grammar.variables.iter_mut().enumerate() { + let expanded_top_level_repetition = expander.expand_variable(i, variable); // If a hidden variable had a top-level repetition and it was converted to // a recursive rule, then it can't be inlined. @@ -119,9 +121,7 @@ pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSy } } - grammar - .variables - .extend(expander.auxiliary_variables.into_iter()); + grammar.variables.extend(expander.auxiliary_variables); grammar } diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs index d6c73d9..a11f9ad 100644 --- a/cli/src/generate/prepare_grammar/expand_tokens.rs +++ b/cli/src/generate/prepare_grammar/expand_tokens.rs @@ -1,20 +1,20 @@ -use super::ExtractedLexicalGrammar; -use crate::generate::grammars::{LexicalGrammar, LexicalVariable}; -use crate::generate::nfa::{CharacterSet, Nfa, NfaState}; -use crate::generate::rules::{Precedence, Rule}; +use std::collections::HashMap; + use anyhow::{anyhow, Context, Result}; use lazy_static::lazy_static; -use regex::Regex; use regex_syntax::ast::{ - parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetBinaryOpKind, ClassSetItem, - ClassUnicodeKind, RepetitionKind, RepetitionRange, + parse, Ast, ClassPerlKind, ClassSet, ClassSetBinaryOpKind, ClassSetItem, ClassUnicodeKind, + RepetitionKind, RepetitionRange, +}; + +use super::ExtractedLexicalGrammar; +use crate::generate::{ + grammars::{LexicalGrammar, LexicalVariable}, + nfa::{CharacterSet, Nfa, NfaState}, + rules::{Precedence, Rule}, }; -use std::collections::HashMap; -use std::i32; lazy_static! { - static ref CURLY_BRACE_REGEX: Regex = - Regex::new(r#"(^|[^\\pP])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}"#).unwrap(); static ref UNICODE_CATEGORIES: HashMap<&'static str, Vec> = serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap(); static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec> = @@ -25,11 +25,10 @@ lazy_static! { serde_json::from_str(UNICODE_PROPERTY_ALIASES_JSON).unwrap(); } -const UNICODE_CATEGORIES_JSON: &'static str = include_str!("./unicode-categories.json"); -const UNICODE_PROPERTIES_JSON: &'static str = include_str!("./unicode-properties.json"); -const UNICODE_CATEGORY_ALIASES_JSON: &'static str = include_str!("./unicode-category-aliases.json"); -const UNICODE_PROPERTY_ALIASES_JSON: &'static str = include_str!("./unicode-property-aliases.json"); -const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/']; +const UNICODE_CATEGORIES_JSON: &str = include_str!("./unicode-categories.json"); +const UNICODE_PROPERTIES_JSON: &str = include_str!("./unicode-properties.json"); +const UNICODE_CATEGORY_ALIASES_JSON: &str = include_str!("./unicode-category-aliases.json"); +const UNICODE_PROPERTY_ALIASES_JSON: &str = include_str!("./unicode-property-aliases.json"); struct NfaBuilder { nfa: Nfa, @@ -51,7 +50,7 @@ fn get_implicit_precedence(rule: &Rule) -> i32 { } } -fn get_completion_precedence(rule: &Rule) -> i32 { +const fn get_completion_precedence(rule: &Rule) -> i32 { if let Rule::Metadata { params, .. } = rule { if let Precedence::Integer(p) = params.precedence { return p; @@ -60,43 +59,18 @@ fn get_completion_precedence(rule: &Rule) -> i32 { 0 } -fn preprocess_regex(content: &str) -> String { - let content = CURLY_BRACE_REGEX.replace(content, "$1\\{$2\\}"); - let mut result = String::with_capacity(content.len()); - let mut is_escaped = false; - for c in content.chars() { - if is_escaped { - if ALLOWED_REDUNDANT_ESCAPED_CHARS.contains(&c) { - result.push(c); - } else { - result.push('\\'); - result.push(c); - } - is_escaped = false; - } else if c == '\\' { - is_escaped = true; - } else { - result.push(c); - } - } - if is_escaped { - result.push('\\'); - } - result -} - -pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result { +pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result { let mut builder = NfaBuilder { nfa: Nfa::new(), is_sep: true, precedence_stack: vec![0], }; - let separator_rule = if grammar.separators.len() > 0 { + let separator_rule = if grammar.separators.is_empty() { + Rule::Blank + } else { grammar.separators.push(Rule::Blank); Rule::repeat(Rule::choice(grammar.separators)) - } else { - Rule::Blank }; let mut variables = Vec::new(); @@ -139,17 +113,16 @@ pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result Result { match rule { - Rule::Pattern(s) => { - let s = preprocess_regex(s); - let ast = parse::Parser::new().parse(&s)?; - self.expand_regex(&ast, next_state_id) + Rule::Pattern(s, f) => { + let ast = parse::Parser::new().parse(s)?; + self.expand_regex(&ast, next_state_id, f.contains('i')) } Rule::String(s) => { for c in s.chars().rev() { self.push_advance(CharacterSet::empty().add_char(c), next_state_id); next_state_id = self.nfa.last_state_id(); } - Ok(s.len() > 0) + Ok(!s.is_empty()) } Rule::Choice(elements) => { let mut alternative_state_ids = Vec::new(); @@ -170,7 +143,7 @@ impl NfaBuilder { } Rule::Seq(elements) => { let mut result = false; - for element in elements.into_iter().rev() { + for element in elements.iter().rev() { if self.expand_rule(element, next_state_id)? { result = true; } @@ -206,16 +179,46 @@ impl NfaBuilder { result } Rule::Blank => Ok(false), - _ => Err(anyhow!("Grammar error: Unexpected rule {:?}", rule)), + _ => Err(anyhow!("Grammar error: Unexpected rule {rule:?}")), } } - fn expand_regex(&mut self, ast: &Ast, mut next_state_id: u32) -> Result { + fn expand_regex( + &mut self, + ast: &Ast, + mut next_state_id: u32, + case_insensitive: bool, + ) -> Result { + const fn inverse_char(c: char) -> char { + match c { + 'a'..='z' => (c as u8 - b'a' + b'A') as char, + 'A'..='Z' => (c as u8 - b'A' + b'a') as char, + c => c, + } + } + + fn with_inverse_char(mut chars: CharacterSet) -> CharacterSet { + for char in chars.clone().chars() { + let inverted = inverse_char(char); + if char != inverted { + chars = chars.add_char(inverted); + } + } + chars + } + match ast { Ast::Empty(_) => Ok(false), Ast::Flags(_) => Err(anyhow!("Regex error: Flags are not supported")), Ast::Literal(literal) => { - self.push_advance(CharacterSet::from_char(literal.c), next_state_id); + let mut char_set = CharacterSet::from_char(literal.c); + if case_insensitive { + let inverted = inverse_char(literal.c); + if literal.c != inverted { + char_set = char_set.add_char(inverted); + } + } + self.push_advance(char_set, next_state_id); Ok(true) } Ast::Dot(_) => { @@ -223,70 +226,82 @@ impl NfaBuilder { Ok(true) } Ast::Assertion(_) => Err(anyhow!("Regex error: Assertions are not supported")), - Ast::Class(class) => match class { - Class::Unicode(class) => { - let mut chars = self.expand_unicode_character_class(&class.kind)?; - if class.negated { - chars = chars.negate(); - } - self.push_advance(chars, next_state_id); - Ok(true) + Ast::ClassUnicode(class) => { + let mut chars = self.expand_unicode_character_class(&class.kind)?; + if class.negated { + chars = chars.negate(); } - Class::Perl(class) => { - let mut chars = self.expand_perl_character_class(&class.kind); - if class.negated { - chars = chars.negate(); - } - self.push_advance(chars, next_state_id); - Ok(true) + if case_insensitive { + chars = with_inverse_char(chars); } - Class::Bracketed(class) => { - let mut chars = self.translate_class_set(&class.kind)?; - if class.negated { - chars = chars.negate(); - } - self.push_advance(chars, next_state_id); - Ok(true) + self.push_advance(chars, next_state_id); + Ok(true) + } + Ast::ClassPerl(class) => { + let mut chars = self.expand_perl_character_class(&class.kind); + if class.negated { + chars = chars.negate(); } - }, + if case_insensitive { + chars = with_inverse_char(chars); + } + self.push_advance(chars, next_state_id); + Ok(true) + } + Ast::ClassBracketed(class) => { + let mut chars = self.translate_class_set(&class.kind)?; + if class.negated { + chars = chars.negate(); + } + if case_insensitive { + chars = with_inverse_char(chars); + } + self.push_advance(chars, next_state_id); + Ok(true) + } Ast::Repetition(repetition) => match repetition.op.kind { RepetitionKind::ZeroOrOne => { - self.expand_zero_or_one(&repetition.ast, next_state_id) + self.expand_zero_or_one(&repetition.ast, next_state_id, case_insensitive) } RepetitionKind::OneOrMore => { - self.expand_one_or_more(&repetition.ast, next_state_id) + self.expand_one_or_more(&repetition.ast, next_state_id, case_insensitive) } RepetitionKind::ZeroOrMore => { - self.expand_zero_or_more(&repetition.ast, next_state_id) + self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive) } RepetitionKind::Range(RepetitionRange::Exactly(count)) => { - self.expand_count(&repetition.ast, count, next_state_id) + self.expand_count(&repetition.ast, count, next_state_id, case_insensitive) } RepetitionKind::Range(RepetitionRange::AtLeast(min)) => { - if self.expand_zero_or_more(&repetition.ast, next_state_id)? { - self.expand_count(&repetition.ast, min, next_state_id) + if self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)? { + self.expand_count(&repetition.ast, min, next_state_id, case_insensitive) } else { Ok(false) } } RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => { - let mut result = self.expand_count(&repetition.ast, min, next_state_id)?; + let mut result = + self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)?; for _ in min..max { if result { next_state_id = self.nfa.last_state_id(); } - if self.expand_zero_or_one(&repetition.ast, next_state_id)? { + if self.expand_zero_or_one( + &repetition.ast, + next_state_id, + case_insensitive, + )? { result = true; } } Ok(result) } }, - Ast::Group(group) => self.expand_regex(&group.ast, next_state_id), + Ast::Group(group) => self.expand_regex(&group.ast, next_state_id, case_insensitive), Ast::Alternation(alternation) => { let mut alternative_state_ids = Vec::new(); - for ast in alternation.asts.iter() { - if self.expand_regex(&ast, next_state_id)? { + for ast in &alternation.asts { + if self.expand_regex(ast, next_state_id, case_insensitive)? { alternative_state_ids.push(self.nfa.last_state_id()); } else { alternative_state_ids.push(next_state_id); @@ -304,7 +319,7 @@ impl NfaBuilder { Ast::Concat(concat) => { let mut result = false; for ast in concat.asts.iter().rev() { - if self.expand_regex(&ast, next_state_id)? { + if self.expand_regex(ast, next_state_id, case_insensitive)? { result = true; next_state_id = self.nfa.last_state_id(); } @@ -316,7 +331,7 @@ impl NfaBuilder { fn translate_class_set(&self, class_set: &ClassSet) -> Result { match &class_set { - ClassSet::Item(item) => self.expand_character_class(&item), + ClassSet::Item(item) => self.expand_character_class(item), ClassSet::BinaryOp(binary_op) => { let mut lhs_char_class = self.translate_class_set(&binary_op.lhs)?; let mut rhs_char_class = self.translate_class_set(&binary_op.rhs)?; @@ -335,13 +350,18 @@ impl NfaBuilder { } } - fn expand_one_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result { + fn expand_one_or_more( + &mut self, + ast: &Ast, + next_state_id: u32, + case_insensitive: bool, + ) -> Result { self.nfa.states.push(NfaState::Accept { variable_index: 0, precedence: 0, }); // Placeholder for split let split_state_id = self.nfa.last_state_id(); - if self.expand_regex(&ast, split_state_id)? { + if self.expand_regex(ast, split_state_id, case_insensitive)? { self.nfa.states[split_state_id as usize] = NfaState::Split(self.nfa.last_state_id(), next_state_id); Ok(true) @@ -351,8 +371,13 @@ impl NfaBuilder { } } - fn expand_zero_or_one(&mut self, ast: &Ast, next_state_id: u32) -> Result { - if self.expand_regex(ast, next_state_id)? { + fn expand_zero_or_one( + &mut self, + ast: &Ast, + next_state_id: u32, + case_insensitive: bool, + ) -> Result { + if self.expand_regex(ast, next_state_id, case_insensitive)? { self.push_split(next_state_id); Ok(true) } else { @@ -360,8 +385,13 @@ impl NfaBuilder { } } - fn expand_zero_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result { - if self.expand_one_or_more(&ast, next_state_id)? { + fn expand_zero_or_more( + &mut self, + ast: &Ast, + next_state_id: u32, + case_insensitive: bool, + ) -> Result { + if self.expand_one_or_more(ast, next_state_id, case_insensitive)? { self.push_split(next_state_id); Ok(true) } else { @@ -369,10 +399,16 @@ impl NfaBuilder { } } - fn expand_count(&mut self, ast: &Ast, count: u32, mut next_state_id: u32) -> Result { + fn expand_count( + &mut self, + ast: &Ast, + count: u32, + mut next_state_id: u32, + case_insensitive: bool, + ) -> Result { let mut result = false; for _ in 0..count { - if self.expand_regex(ast, next_state_id)? { + if self.expand_regex(ast, next_state_id, case_insensitive)? { result = true; next_state_id = self.nfa.last_state_id(); } @@ -388,7 +424,7 @@ impl NfaBuilder { ClassSetItem::Union(union) => { let mut result = CharacterSet::empty(); for item in &union.items { - result = result.add(&self.expand_character_class(&item)?); + result = result.add(&self.expand_character_class(item)?); } Ok(result) } @@ -407,9 +443,8 @@ impl NfaBuilder { } Ok(set) } - _ => Err(anyhow!( - "Regex error: Unsupported character class syntax {:?}", - item + ClassSetItem::Ascii(_) => Err(anyhow!( + "Regex error: Unsupported character class syntax {item:?}", )), } } @@ -430,17 +465,17 @@ impl NfaBuilder { if actual_class_name.len() == 1 { category_letter = actual_class_name.clone(); } else { - let code_points = UNICODE_CATEGORIES - .get(actual_class_name.as_str()) - .or_else(|| UNICODE_PROPERTIES.get(actual_class_name.as_str())) - .ok_or_else(|| { - anyhow!( - "Regex error: Unsupported unicode character class {}", - class_name - ) - })?; + let code_points = + UNICODE_CATEGORIES + .get(actual_class_name.as_str()) + .or_else(|| UNICODE_PROPERTIES.get(actual_class_name.as_str())) + .ok_or_else(|| { + anyhow!( + "Regex error: Unsupported unicode character class {class_name}", + ) + })?; for c in code_points { - if let Some(c) = std::char::from_u32(*c) { + if let Some(c) = char::from_u32(*c) { chars = chars.add_char(c); } } @@ -458,7 +493,7 @@ impl NfaBuilder { for (category, code_points) in UNICODE_CATEGORIES.iter() { if category.starts_with(&category_letter) { for c in code_points { - if let Some(c) = std::char::from_u32(*c) { + if let Some(c) = char::from_u32(*c) { chars = chars.add_char(c); } } @@ -475,7 +510,9 @@ impl NfaBuilder { .add_char(' ') .add_char('\t') .add_char('\r') - .add_char('\n'), + .add_char('\n') + .add_char('\x0B') + .add_char('\x0C'), ClassPerlKind::Word => CharacterSet::empty() .add_char('_') .add_range('A', 'Z') @@ -505,8 +542,10 @@ impl NfaBuilder { #[cfg(test)] mod tests { use super::*; - use crate::generate::grammars::Variable; - use crate::generate::nfa::{NfaCursor, NfaTransition}; + use crate::generate::{ + grammars::Variable, + nfa::{NfaCursor, NfaTransition}, + }; fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> { let start_states = grammar.variables.iter().map(|v| v.start_state).collect(); @@ -563,7 +602,7 @@ mod tests { let table = [ // regex with sequences and alternatives Row { - rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")], + rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?", "")], separators: vec![], examples: vec![ ("ade1", Some((0, "ade"))), @@ -574,13 +613,13 @@ mod tests { }, // regex with repeats Row { - rules: vec![Rule::pattern("a*")], + rules: vec![Rule::pattern("a*", "")], separators: vec![], examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))], }, // regex with repeats in sequences Row { - rules: vec![Rule::pattern("a((bc)+|(de)*)f")], + rules: vec![Rule::pattern("a((bc)+|(de)*)f", "")], separators: vec![], examples: vec![ ("af1", Some((0, "af"))), @@ -591,13 +630,13 @@ mod tests { }, // regex with character ranges Row { - rules: vec![Rule::pattern("[a-fA-F0-9]+")], + rules: vec![Rule::pattern("[a-fA-F0-9]+", "")], separators: vec![], examples: vec![("A1ff0.", Some((0, "A1ff0")))], }, // regex with perl character classes Row { - rules: vec![Rule::pattern("\\w\\d\\s")], + rules: vec![Rule::pattern("\\w\\d\\s", "")], separators: vec![], examples: vec![("_0 ", Some((0, "_0 ")))], }, @@ -611,7 +650,7 @@ mod tests { Row { rules: vec![Rule::repeat(Rule::seq(vec![ Rule::string("{"), - Rule::pattern("[a-f]+"), + Rule::pattern("[a-f]+", ""), Rule::string("}"), ]))], separators: vec![], @@ -624,9 +663,9 @@ mod tests { // longest match rule Row { rules: vec![ - Rule::pattern("a|bc"), - Rule::pattern("aa"), - Rule::pattern("bcd"), + Rule::pattern("a|bc", ""), + Rule::pattern("aa", ""), + Rule::pattern("bcd", ""), ], separators: vec![], examples: vec![ @@ -640,7 +679,7 @@ mod tests { }, // regex with an alternative including the empty string Row { - rules: vec![Rule::pattern("a(b|)+c")], + rules: vec![Rule::pattern("a(b|)+c", "")], separators: vec![], examples: vec![ ("ac.", Some((0, "ac"))), @@ -650,8 +689,8 @@ mod tests { }, // separators Row { - rules: vec![Rule::pattern("[a-f]+")], - separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")], + rules: vec![Rule::pattern("[a-f]+", "")], + separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")], examples: vec![ (" a", Some((0, "a"))), (" \nb", Some((0, "b"))), @@ -662,11 +701,11 @@ mod tests { // shorter tokens with higher precedence Row { rules: vec![ - Rule::prec(Precedence::Integer(2), Rule::pattern("abc")), - Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e")), - Rule::pattern("[a-e]+"), + Rule::prec(Precedence::Integer(2), Rule::pattern("abc", "")), + Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e", "")), + Rule::pattern("[a-e]+", ""), ], - separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")], + separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")], examples: vec![ ("abceef", Some((0, "abc"))), ("abdeef", Some((1, "abde"))), @@ -676,13 +715,13 @@ mod tests { // immediate tokens with higher precedence Row { rules: vec![ - Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+")), + Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+", "")), Rule::immediate_token(Rule::prec( Precedence::Integer(2), - Rule::pattern("[^ab]+"), + Rule::pattern("[^ab]+", ""), )), ], - separators: vec![Rule::pattern("\\s")], + separators: vec![Rule::pattern("\\s", "")], examples: vec![("cccb", Some((1, "ccc")))], }, Row { @@ -704,7 +743,7 @@ mod tests { // nested choices within sequences Row { rules: vec![Rule::seq(vec![ - Rule::pattern("[0-9]+"), + Rule::pattern("[0-9]+", ""), Rule::choice(vec![ Rule::Blank, Rule::choice(vec![Rule::seq(vec![ @@ -713,7 +752,7 @@ mod tests { Rule::Blank, Rule::choice(vec![Rule::string("+"), Rule::string("-")]), ]), - Rule::pattern("[0-9]+"), + Rule::pattern("[0-9]+", ""), ])]), ]), ])], @@ -730,7 +769,7 @@ mod tests { }, // nested groups Row { - rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#)])], + rules: vec![Rule::seq(vec![Rule::pattern(r"([^x\\]|\\(.|\n))+", "")])], separators: vec![], examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))], }, @@ -738,24 +777,24 @@ mod tests { Row { rules: vec![ // Escaped forward slash (used in JS because '/' is the regex delimiter) - Rule::pattern(r#"\/"#), + Rule::pattern(r"\/", ""), // Escaped quotes - Rule::pattern(r#"\"\'"#), + Rule::pattern(r#"\"\'"#, ""), // Quote preceded by a literal backslash - Rule::pattern(r#"[\\']+"#), + Rule::pattern(r"[\\']+", ""), ], separators: vec![], examples: vec![ ("/", Some((0, "/"))), ("\"\'", Some((1, "\"\'"))), - (r#"'\'a"#, Some((2, r#"'\'"#))), + (r"'\'a", Some((2, r"'\'"))), ], }, // unicode property escapes Row { rules: vec![ - Rule::pattern(r#"\p{L}+\P{L}+"#), - Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#), + Rule::pattern(r"\p{L}+\P{L}+", ""), + Rule::pattern(r"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*", ""), ], separators: vec![], examples: vec![ @@ -765,17 +804,17 @@ mod tests { }, // unicode property escapes in bracketed sets Row { - rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#)], + rules: vec![Rule::pattern(r"[\p{L}\p{Nd}]+", "")], separators: vec![], examples: vec![("abΨ12٣٣, ok", Some((0, "abΨ12٣٣")))], }, // unicode character escapes Row { rules: vec![ - Rule::pattern(r#"\u{00dc}"#), - Rule::pattern(r#"\U{000000dd}"#), - Rule::pattern(r#"\u00de"#), - Rule::pattern(r#"\U000000df"#), + Rule::pattern(r"\u{00dc}", ""), + Rule::pattern(r"\U{000000dd}", ""), + Rule::pattern(r"\u00de", ""), + Rule::pattern(r"\U000000df", ""), ], separators: vec![], examples: vec![ @@ -785,17 +824,15 @@ mod tests { ("\u{00df}", Some((3, "\u{00df}"))), ], }, - // allowing un-escaped curly braces Row { rules: vec![ - // Un-escaped curly braces - Rule::pattern(r#"u{[0-9a-fA-F]+}"#), + Rule::pattern(r"u\{[0-9a-fA-F]+\}", ""), // Already-escaped curly braces - Rule::pattern(r#"\{[ab]{3}\}"#), + Rule::pattern(r"\{[ab]{3}\}", ""), // Unicode codepoints - Rule::pattern(r#"\u{1000A}"#), + Rule::pattern(r"\u{1000A}", ""), // Unicode codepoints (lowercase) - Rule::pattern(r#"\u{1000b}"#), + Rule::pattern(r"\u{1000b}", ""), ], separators: vec![], examples: vec![ @@ -807,7 +844,7 @@ mod tests { }, // Emojis Row { - rules: vec![Rule::pattern(r"\p{Emoji}+")], + rules: vec![Rule::pattern(r"\p{Emoji}+", "")], separators: vec![], examples: vec![ ("🐎", Some((0, "🐎"))), @@ -820,7 +857,7 @@ mod tests { }, // Intersection Row { - rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+")], + rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+", "")], separators: vec![], examples: vec![ ("456", Some((0, "456"))), @@ -833,7 +870,7 @@ mod tests { }, // Difference Row { - rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+")], + rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+", "")], separators: vec![], examples: vec![ ("123", Some((0, "123"))), @@ -846,7 +883,7 @@ mod tests { }, // Symmetric difference Row { - rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+")], + rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+", "")], separators: vec![], examples: vec![ ("123", Some((0, "123"))), @@ -867,7 +904,7 @@ mod tests { // [6-7]: y y // [3-9]--[5-7]: y y y y y // final regex: y y y y y y - rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+")], + rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+", "")], separators: vec![], examples: vec![ ("01", Some((0, "01"))), @@ -889,13 +926,13 @@ mod tests { let grammar = expand_tokens(ExtractedLexicalGrammar { separators: separators.clone(), variables: rules - .into_iter() + .iter() .map(|rule| Variable::named("", rule.clone())) .collect(), }) .unwrap(); - for (haystack, needle) in examples.iter() { + for (haystack, needle) in examples { assert_eq!(simulate_nfa(&grammar, haystack), *needle); } } diff --git a/cli/src/generate/prepare_grammar/extract_default_aliases.rs b/cli/src/generate/prepare_grammar/extract_default_aliases.rs index d39bf8d..6831791 100644 --- a/cli/src/generate/prepare_grammar/extract_default_aliases.rs +++ b/cli/src/generate/prepare_grammar/extract_default_aliases.rs @@ -1,5 +1,7 @@ -use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar}; -use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType}; +use crate::generate::{ + grammars::{LexicalGrammar, SyntaxGrammar}, + rules::{Alias, AliasMap, Symbol, SymbolType}, +}; #[derive(Clone, Default)] struct SymbolStatus { @@ -14,8 +16,8 @@ struct SymbolStatus { // This has two benefits: // * It reduces the overhead of storing production-specific alias info in the parse table. // * Within an `ERROR` node, no context-specific aliases will be applied. This transformation -// ensures that the children of an `ERROR` node have symbols that are consistent with the -// way that they would appear in a valid syntax tree. +// ensures that the children of an `ERROR` node have symbols that are consistent with the way that +// they would appear in a valid syntax tree. pub(super) fn extract_default_aliases( syntax_grammar: &mut SyntaxGrammar, lexical_grammar: &LexicalGrammar, @@ -28,10 +30,10 @@ pub(super) fn extract_default_aliases( // For each grammar symbol, find all of the aliases under which the symbol appears, // and determine whether or not the symbol ever appears *unaliased*. - for variable in syntax_grammar.variables.iter() { - for production in variable.productions.iter() { - for step in production.steps.iter() { - let mut status = match step.symbol.kind { + for variable in &syntax_grammar.variables { + for production in &variable.productions { + for step in &production.steps { + let status = match step.symbol.kind { SymbolType::External => &mut external_status_list[step.symbol.index], SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], @@ -62,8 +64,8 @@ pub(super) fn extract_default_aliases( } } - for symbol in syntax_grammar.extra_symbols.iter() { - let mut status = match symbol.kind { + for symbol in &syntax_grammar.extra_symbols { + let status = match symbol.kind { SymbolType::External => &mut external_status_list[symbol.index], SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index], SymbolType::Terminal => &mut terminal_status_list[symbol.index], @@ -98,25 +100,23 @@ pub(super) fn extract_default_aliases( for (symbol, status) in symbols_with_statuses { if status.appears_unaliased { status.aliases.clear(); - } else { - if let Some(default_entry) = status - .aliases - .iter() - .enumerate() - .max_by_key(|(i, (_, count))| (count, -(*i as i64))) - .map(|(_, entry)| entry.clone()) - { - status.aliases.clear(); - status.aliases.push(default_entry.clone()); - result.insert(symbol, default_entry.0); - } + } else if let Some(default_entry) = status + .aliases + .iter() + .enumerate() + .max_by_key(|(i, (_, count))| (count, -(*i as i64))) + .map(|(_, entry)| entry.clone()) + { + status.aliases.clear(); + status.aliases.push(default_entry.clone()); + result.insert(symbol, default_entry.0); } } // Wherever a symbol is aliased as its default alias, remove the usage of the alias, // because it will now be redundant. let mut alias_positions_to_clear = Vec::new(); - for variable in syntax_grammar.variables.iter_mut() { + for variable in &mut syntax_grammar.variables { alias_positions_to_clear.clear(); for (i, production) in variable.productions.iter().enumerate() { @@ -132,7 +132,7 @@ pub(super) fn extract_default_aliases( // If this step is aliased as the symbol's default alias, then remove that alias. if step.alias.is_some() - && step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0) + && step.alias.as_ref() == status.aliases.first().map(|t| &t.0) { let mut other_productions_must_use_this_alias_at_this_index = false; for (other_i, other_production) in variable.productions.iter().enumerate() { @@ -164,10 +164,10 @@ pub(super) fn extract_default_aliases( #[cfg(test)] mod tests { use super::*; - use crate::generate::grammars::{ - LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, + use crate::generate::{ + grammars::{LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType}, + nfa::Nfa, }; - use crate::generate::nfa::Nfa; #[test] fn test_extract_simple_aliases() { diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs index 928f914..f9aa1bc 100644 --- a/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -1,9 +1,12 @@ -use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar}; -use crate::generate::grammars::{ExternalToken, Variable, VariableType}; -use crate::generate::rules::{MetadataParams, Rule, Symbol, SymbolType}; +use std::{collections::HashMap, mem}; + use anyhow::{anyhow, Result}; -use std::collections::HashMap; -use std::mem; + +use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar}; +use crate::generate::{ + grammars::{ExternalToken, Variable, VariableType}, + rules::{MetadataParams, Rule, Symbol, SymbolType}, +}; pub(super) fn extract_tokens( mut grammar: InternedGrammar, @@ -15,12 +18,12 @@ pub(super) fn extract_tokens( extracted_usage_counts: Vec::new(), }; - for mut variable in grammar.variables.iter_mut() { - extractor.extract_tokens_in_variable(&mut variable); + for variable in &mut grammar.variables { + extractor.extract_tokens_in_variable(variable); } - for mut variable in grammar.external_tokens.iter_mut() { - extractor.extract_tokens_in_variable(&mut variable); + for variable in &mut grammar.external_tokens { + extractor.extract_tokens_in_variable(variable); } let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len()); @@ -49,7 +52,7 @@ pub(super) fn extract_tokens( }) = variable.rule { if i > 0 && extractor.extracted_usage_counts[index] == 1 { - let mut lexical_variable = &mut lexical_variables[index]; + let lexical_variable = &mut lexical_variables[index]; lexical_variable.kind = variable.kind; lexical_variable.name = variable.name; symbol_replacer.replacements.insert(i, index); @@ -59,7 +62,7 @@ pub(super) fn extract_tokens( variables.push(variable); } - for variable in variables.iter_mut() { + for variable in &mut variables { variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule); } @@ -67,10 +70,10 @@ pub(super) fn extract_tokens( .expected_conflicts .into_iter() .map(|conflict| { - let mut result: Vec<_> = conflict + let mut result = conflict .iter() .map(|symbol| symbol_replacer.replace_symbol(*symbol)) - .collect(); + .collect::>(); result.sort_unstable(); result.dedup(); result @@ -94,12 +97,10 @@ pub(super) fn extract_tokens( for rule in grammar.extra_symbols { if let Rule::Symbol(symbol) = rule { extra_symbols.push(symbol_replacer.replace_symbol(symbol)); + } else if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) { + extra_symbols.push(Symbol::terminal(index)); } else { - if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) { - extra_symbols.push(Symbol::terminal(index)); - } else { - separators.push(rule); - } + separators.push(rule); } } @@ -119,13 +120,13 @@ pub(super) fn extract_tokens( name: external_token.name, kind: external_token.kind, corresponding_internal_token: None, - }) + }); } else { external_tokens.push(ExternalToken { name: lexical_variables[symbol.index].name.clone(), kind: external_token.kind, corresponding_internal_token: Some(symbol), - }) + }); } } else { return Err(anyhow!( @@ -209,7 +210,7 @@ impl TokenExtractor { } else { Rule::Metadata { params: params.clone(), - rule: Box::new(self.extract_tokens_in_rule((&rule).clone())), + rule: Box::new(self.extract_tokens_in_rule(rule)), } } } @@ -298,20 +299,19 @@ impl SymbolReplacer { } let mut adjusted_index = symbol.index; - for (replaced_index, _) in self.replacements.iter() { + for replaced_index in self.replacements.keys() { if *replaced_index < symbol.index { adjusted_index -= 1; } } - return Symbol::non_terminal(adjusted_index); + Symbol::non_terminal(adjusted_index) } } #[cfg(test)] mod test { use super::*; - use crate::generate::grammars::VariableType; #[test] fn test_extraction() { @@ -320,7 +320,7 @@ mod test { "rule_0", Rule::repeat(Rule::seq(vec![ Rule::string("a"), - Rule::pattern("b"), + Rule::pattern("b", ""), Rule::choice(vec![ Rule::non_terminal(1), Rule::non_terminal(2), @@ -331,8 +331,8 @@ mod test { ]), ])), ), - Variable::named("rule_1", Rule::pattern("e")), - Variable::named("rule_2", Rule::pattern("b")), + Variable::named("rule_1", Rule::pattern("e", "")), + Variable::named("rule_2", Rule::pattern("b", "")), Variable::named( "rule_3", Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]), @@ -378,12 +378,12 @@ mod test { lexical_grammar.variables, vec![ Variable::anonymous("a", Rule::string("a")), - Variable::auxiliary("rule_0_token1", Rule::pattern("b")), + Variable::auxiliary("rule_0_token1", Rule::pattern("b", "")), Variable::auxiliary( "rule_0_token2", Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),])) ), - Variable::named("rule_1", Rule::pattern("e")), + Variable::named("rule_1", Rule::pattern("e", "")), ] ); } @@ -404,14 +404,14 @@ mod test { assert_eq!( lexical_grammar.variables, vec![Variable::anonymous("hello", Rule::string("hello")),] - ) + ); } #[test] fn test_extracting_extra_symbols() { let mut grammar = build_grammar(vec![ Variable::named("rule_0", Rule::string("x")), - Variable::named("comment", Rule::pattern("//.*")), + Variable::named("comment", Rule::pattern("//.*", "")), ]); grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)]; diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs index e9950e1..4b707be 100644 --- a/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -1,9 +1,10 @@ +use anyhow::{anyhow, Result}; + use super::ExtractedSyntaxGrammar; -use crate::generate::grammars::{ - Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable, +use crate::generate::{ + grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable}, + rules::{Alias, Associativity, Precedence, Rule, Symbol}, }; -use crate::generate::rules::{Alias, Associativity, Precedence, Rule, Symbol}; -use anyhow::{anyhow, Result}; struct RuleFlattener { production: Production, @@ -88,7 +89,7 @@ impl RuleFlattener { self.associativity_stack.pop(); if did_push && !at_end { self.production.steps.last_mut().unwrap().associativity = - self.associativity_stack.last().cloned(); + self.associativity_stack.last().copied(); } } @@ -110,7 +111,7 @@ impl RuleFlattener { .last() .cloned() .unwrap_or(Precedence::None), - associativity: self.associativity_stack.last().cloned(), + associativity: self.associativity_stack.last().copied(), alias: self.alias_stack.last().cloned(), field_name: self.field_name_stack.last().cloned(), }); @@ -129,7 +130,7 @@ fn extract_choices(rule: Rule) -> Vec { let extraction = extract_choices(element); let mut next_result = Vec::new(); for entry in result { - for extraction_entry in extraction.iter() { + for extraction_entry in &extraction { next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()])); } } @@ -157,7 +158,7 @@ fn extract_choices(rule: Rule) -> Vec { } } -fn flatten_variable(variable: Variable) -> Result { +fn flatten_variable(variable: Variable) -> SyntaxVariable { let mut productions = Vec::new(); for rule in extract_choices(variable.rule) { let production = RuleFlattener::new().flatten(rule); @@ -165,14 +166,14 @@ fn flatten_variable(variable: Variable) -> Result { productions.push(production); } } - Ok(SyntaxVariable { + SyntaxVariable { name: variable.name, kind: variable.kind, productions, - }) + } } -fn symbol_is_used(variables: &Vec, symbol: Symbol) -> bool { +fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool { for variable in variables { for production in &variable.productions { for step in &production.steps { @@ -188,7 +189,7 @@ fn symbol_is_used(variables: &Vec, symbol: Symbol) -> bool { pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result { let mut variables = Vec::new(); for variable in grammar.variables { - variables.push(flatten_variable(variable)?); + variables.push(flatten_variable(variable)); } for (i, variable) in variables.iter().enumerate() { for production in &variable.productions { @@ -220,7 +221,6 @@ unless they are used only as the grammar's start rule. mod tests { use super::*; use crate::generate::grammars::VariableType; - use crate::generate::rules::Symbol; #[test] fn test_flatten_grammar() { @@ -245,8 +245,7 @@ mod tests { ), Rule::non_terminal(7), ]), - }) - .unwrap(); + }); assert_eq!( result.productions, @@ -304,8 +303,7 @@ mod tests { ), Rule::non_terminal(7), ]), - }) - .unwrap(); + }); assert_eq!( result.productions, @@ -344,8 +342,7 @@ mod tests { Precedence::Integer(101), Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]), ), - }) - .unwrap(); + }); assert_eq!( result.productions, @@ -367,8 +364,7 @@ mod tests { Precedence::Integer(101), Rule::seq(vec![Rule::non_terminal(1)]), ), - }) - .unwrap(); + }); assert_eq!( result.productions, @@ -393,8 +389,7 @@ mod tests { Rule::field("second-thing".to_string(), Rule::terminal(3)), ]), ]), - }) - .unwrap(); + }); assert_eq!( result.productions, diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs index 5cd29cc..707aa86 100644 --- a/cli/src/generate/prepare_grammar/intern_symbols.rs +++ b/cli/src/generate/prepare_grammar/intern_symbols.rs @@ -1,8 +1,11 @@ -use super::InternedGrammar; -use crate::generate::grammars::{InputGrammar, Variable, VariableType}; -use crate::generate::rules::{Rule, Symbol}; use anyhow::{anyhow, Result}; +use super::InternedGrammar; +use crate::generate::{ + grammars::{InputGrammar, Variable, VariableType}, + rules::{Rule, Symbol}, +}; + pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result { let interner = Interner { grammar }; @@ -11,7 +14,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result } let mut variables = Vec::with_capacity(grammar.variables.len()); - for variable in grammar.variables.iter() { + for variable in &grammar.variables { variables.push(Variable { name: variable.name.clone(), kind: variable_type_for_name(&variable.name), @@ -20,10 +23,10 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result } let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len()); - for external_token in grammar.external_tokens.iter() { - let rule = interner.intern_rule(&external_token)?; + for external_token in &grammar.external_tokens { + let rule = interner.intern_rule(external_token)?; let (name, kind) = if let Rule::NamedSymbol(name) = external_token { - (name.clone(), variable_type_for_name(&name)) + (name.clone(), variable_type_for_name(name)) } else { (String::new(), VariableType::Anonymous) }; @@ -31,35 +34,35 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result } let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len()); - for extra_token in grammar.extra_symbols.iter() { + for extra_token in &grammar.extra_symbols { extra_symbols.push(interner.intern_rule(extra_token)?); } let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len()); - for supertype_symbol_name in grammar.supertype_symbols.iter() { + for supertype_symbol_name in &grammar.supertype_symbols { supertype_symbols.push( interner .intern_name(supertype_symbol_name) - .ok_or_else(|| anyhow!("Undefined symbol `{}`", supertype_symbol_name))?, + .ok_or_else(|| anyhow!("Undefined symbol `{supertype_symbol_name}`"))?, ); } let mut expected_conflicts = Vec::new(); - for conflict in grammar.expected_conflicts.iter() { + for conflict in &grammar.expected_conflicts { let mut interned_conflict = Vec::with_capacity(conflict.len()); for name in conflict { interned_conflict.push( interner - .intern_name(&name) - .ok_or_else(|| anyhow!("Undefined symbol `{}`", name))?, + .intern_name(name) + .ok_or_else(|| anyhow!("Undefined symbol `{name}`"))?, ); } expected_conflicts.push(interned_conflict); } let mut variables_to_inline = Vec::new(); - for name in grammar.variables_to_inline.iter() { - if let Some(symbol) = interner.intern_name(&name) { + for name in &grammar.variables_to_inline { + if let Some(symbol) = interner.intern_name(name) { variables_to_inline.push(symbol); } } @@ -68,8 +71,8 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result if let Some(name) = grammar.word_token.as_ref() { word_token = Some( interner - .intern_name(&name) - .ok_or_else(|| anyhow!("Undefined symbol `{}`", &name))?, + .intern_name(name) + .ok_or_else(|| anyhow!("Undefined symbol `{name}`"))?, ); } @@ -118,13 +121,10 @@ impl<'a> Interner<'a> { params: params.clone(), }), - Rule::NamedSymbol(name) => { - if let Some(symbol) = self.intern_name(&name) { - Ok(Rule::Symbol(symbol)) - } else { - Err(anyhow!("Undefined symbol `{}`", name)) - } - } + Rule::NamedSymbol(name) => self.intern_name(name).map_or_else( + || Err(anyhow!("Undefined symbol `{name}`")), + |symbol| Ok(Rule::Symbol(symbol)), + ), _ => Ok(rule.clone()), } @@ -145,12 +145,12 @@ impl<'a> Interner<'a> { } } - return None; + None } } fn variable_type_for_name(name: &str) -> VariableType { - if name.starts_with("_") { + if name.starts_with('_') { VariableType::Hidden } else { VariableType::Named diff --git a/cli/src/generate/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs index 51b32cc..7e4800c 100644 --- a/cli/src/generate/prepare_grammar/mod.rs +++ b/cli/src/generate/prepare_grammar/mod.rs @@ -6,27 +6,29 @@ mod flatten_grammar; mod intern_symbols; mod process_inlines; -pub(crate) use self::expand_tokens::expand_tokens; - -use self::expand_repeats::expand_repeats; -use self::extract_default_aliases::extract_default_aliases; -use self::extract_tokens::extract_tokens; -use self::flatten_grammar::flatten_grammar; -use self::intern_symbols::intern_symbols; -use self::process_inlines::process_inlines; -use super::grammars::{ - ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry, - SyntaxGrammar, Variable, -}; -use super::rules::{AliasMap, Precedence, Rule, Symbol}; -use anyhow::{anyhow, Result}; use std::{ cmp::Ordering, collections::{hash_map, HashMap, HashSet}, mem, }; -pub(crate) struct IntermediateGrammar { +use anyhow::{anyhow, Result}; + +pub use self::expand_tokens::expand_tokens; +use self::{ + expand_repeats::expand_repeats, extract_default_aliases::extract_default_aliases, + extract_tokens::extract_tokens, flatten_grammar::flatten_grammar, + intern_symbols::intern_symbols, process_inlines::process_inlines, +}; +use super::{ + grammars::{ + ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry, + SyntaxGrammar, Variable, + }, + rules::{AliasMap, Precedence, Rule, Symbol}, +}; + +pub struct IntermediateGrammar { variables: Vec, extra_symbols: Vec, expected_conflicts: Vec>, @@ -37,12 +39,12 @@ pub(crate) struct IntermediateGrammar { word_token: Option, } -pub(crate) type InternedGrammar = IntermediateGrammar; +pub type InternedGrammar = IntermediateGrammar; -pub(crate) type ExtractedSyntaxGrammar = IntermediateGrammar; +pub type ExtractedSyntaxGrammar = IntermediateGrammar; #[derive(Debug, PartialEq, Eq)] -pub(crate) struct ExtractedLexicalGrammar { +pub struct ExtractedLexicalGrammar { pub variables: Vec, pub separators: Vec, } @@ -50,21 +52,21 @@ pub(crate) struct ExtractedLexicalGrammar { impl Default for IntermediateGrammar { fn default() -> Self { Self { - variables: Default::default(), - extra_symbols: Default::default(), - expected_conflicts: Default::default(), - precedence_orderings: Default::default(), - external_tokens: Default::default(), - variables_to_inline: Default::default(), - supertype_symbols: Default::default(), - word_token: Default::default(), + variables: Vec::default(), + extra_symbols: Vec::default(), + expected_conflicts: Vec::default(), + precedence_orderings: Vec::default(), + external_tokens: Vec::default(), + variables_to_inline: Vec::default(), + supertype_symbols: Vec::default(), + word_token: Option::default(), } } } /// Transform an input grammar into separate components that are ready /// for parse table construction. -pub(crate) fn prepare_grammar( +pub fn prepare_grammar( input_grammar: &InputGrammar, ) -> Result<( SyntaxGrammar, @@ -109,9 +111,7 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> { hash_map::Entry::Occupied(e) => { if e.get() != &ordering { return Err(anyhow!( - "Conflicting orderings for precedences {} and {}", - entry1, - entry2 + "Conflicting orderings for precedences {entry1} and {entry2}", )); } } @@ -127,16 +127,11 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> { Rule::Repeat(rule) => validate(rule_name, rule, names), Rule::Seq(elements) | Rule::Choice(elements) => elements .iter() - .map(|e| validate(rule_name, e, names)) - .collect(), + .try_for_each(|e| validate(rule_name, e, names)), Rule::Metadata { rule, params } => { if let Precedence::Name(n) = ¶ms.precedence { if !names.contains(n) { - return Err(anyhow!( - "Undeclared precedence '{}' in rule '{}'", - n, - rule_name - )); + return Err(anyhow!("Undeclared precedence '{n}' in rule '{rule_name}'")); } } validate(rule_name, rule, names)?; @@ -168,7 +163,7 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> { #[cfg(test)] mod tests { use super::*; - use crate::generate::grammars::{InputGrammar, Variable, VariableType}; + use crate::generate::grammars::VariableType; #[test] fn test_validate_precedences_with_undeclared_precedence() { diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs index 206ef8d..ec43dd6 100644 --- a/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/cli/src/generate/prepare_grammar/process_inlines.rs @@ -1,14 +1,16 @@ +use std::collections::HashMap; + +use anyhow::{anyhow, Result}; + use crate::generate::{ grammars::{InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar}, rules::SymbolType, }; -use anyhow::{anyhow, Result}; -use std::collections::HashMap; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] struct ProductionStepId { // A `None` value here means that the production itself was produced via inlining, - // and is stored in the the builder's `productions` vector, as opposed to being + // and is stored in the builder's `productions` vector, as opposed to being // stored in one of the grammar's variables. variable_index: Option, production_index: usize, @@ -21,7 +23,7 @@ struct InlinedProductionMapBuilder { } impl InlinedProductionMapBuilder { - fn build<'a>(mut self, grammar: &'a SyntaxGrammar) -> InlinedProductionMap { + fn build(mut self, grammar: &SyntaxGrammar) -> InlinedProductionMap { let mut step_ids_to_process = Vec::new(); for (variable_index, variable) in grammar.variables.iter().enumerate() { for production_index in 0..variable.productions.len() { @@ -38,14 +40,14 @@ impl InlinedProductionMapBuilder { if grammar.variables_to_inline.contains(&step.symbol) { let inlined_step_ids = self .inline_production_at_step(step_id, grammar) - .into_iter() - .cloned() + .iter() + .copied() .map(|production_index| ProductionStepId { variable_index: None, production_index, step_index: step_id.step_index, }); - step_ids_to_process.splice(i..i + 1, inlined_step_ids); + step_ids_to_process.splice(i..=i, inlined_step_ids); } else { step_ids_to_process[i] = ProductionStepId { variable_index: step_id.variable_index, @@ -67,11 +69,12 @@ impl InlinedProductionMapBuilder { let production_map = production_indices_by_step_id .into_iter() .map(|(step_id, production_indices)| { - let production = if let Some(variable_index) = step_id.variable_index { - &grammar.variables[variable_index].productions[step_id.production_index] - } else { - &productions[step_id.production_index] - } as *const Production; + let production = step_id.variable_index.map_or_else( + || &productions[step_id.production_index], + |variable_index| { + &grammar.variables[variable_index].productions[step_id.production_index] + }, + ) as *const Production; ((production, step_id.step_index as u32), production_indices) }) .collect(); @@ -86,29 +89,29 @@ impl InlinedProductionMapBuilder { &'a mut self, step_id: ProductionStepId, grammar: &'a SyntaxGrammar, - ) -> &'a Vec { + ) -> &'a [usize] { // Build a list of productions produced by inlining rules. let mut i = 0; let step_index = step_id.step_index; let mut productions_to_add = vec![self.production_for_id(step_id, grammar).clone()]; while i < productions_to_add.len() { if let Some(step) = productions_to_add[i].steps.get(step_index) { - let symbol = step.symbol.clone(); + let symbol = step.symbol; if grammar.variables_to_inline.contains(&symbol) { // Remove the production from the vector, replacing it with a placeholder. let production = productions_to_add - .splice(i..i + 1, [Production::default()].iter().cloned()) + .splice(i..=i, std::iter::once(&Production::default()).cloned()) .next() .unwrap(); // Replace the placeholder with the inlined productions. productions_to_add.splice( - i..i + 1, + i..=i, grammar.variables[symbol.index].productions.iter().map(|p| { let mut production = production.clone(); let removed_step = production .steps - .splice(step_index..(step_index + 1), p.steps.iter().cloned()) + .splice(step_index..=step_index, p.steps.iter().cloned()) .next() .unwrap(); let inserted_steps = @@ -127,7 +130,7 @@ impl InlinedProductionMapBuilder { if last_inserted_step.precedence.is_none() { last_inserted_step.precedence = removed_step.precedence; } - if last_inserted_step.associativity == None { + if last_inserted_step.associativity.is_none() { last_inserted_step.associativity = removed_step.associativity; } } @@ -151,7 +154,7 @@ impl InlinedProductionMapBuilder { self.productions .iter() .position(|p| *p == production) - .unwrap_or({ + .unwrap_or_else(|| { self.productions.push(production); self.productions.len() - 1 }) @@ -169,11 +172,10 @@ impl InlinedProductionMapBuilder { id: ProductionStepId, grammar: &'a SyntaxGrammar, ) -> &'a Production { - if let Some(variable_index) = id.variable_index { - &grammar.variables[variable_index].productions[id.production_index] - } else { - &self.productions[id.production_index] - } + id.variable_index.map_or_else( + || &self.productions[id.production_index], + |variable_index| &grammar.variables[variable_index].productions[id.production_index], + ) } fn production_step_for_id<'a>( @@ -203,6 +205,12 @@ pub(super) fn process_inlines( lexical_grammar.variables[symbol.index].name, )) } + SymbolType::NonTerminal if symbol.index == 0 => { + return Err(anyhow!( + "Rule `{}` cannot be inlined because it is the first rule", + grammar.variables[symbol.index].name, + )) + } _ => {} } } @@ -217,10 +225,10 @@ pub(super) fn process_inlines( #[cfg(test)] mod tests { use super::*; - use crate::generate::grammars::{ - LexicalVariable, ProductionStep, SyntaxVariable, VariableType, + use crate::generate::{ + grammars::{LexicalVariable, SyntaxVariable, VariableType}, + rules::{Associativity, Precedence, Symbol}, }; - use crate::generate::rules::{Associativity, Precedence, Symbol}; #[test] fn test_basic_inlining() { @@ -260,7 +268,7 @@ mod tests { ..Default::default() }; - let inline_map = process_inlines(&grammar, &Default::default()).unwrap(); + let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap(); // Nothing to inline at step 0. assert!(inline_map @@ -356,15 +364,15 @@ mod tests { ..Default::default() }; - let inline_map = process_inlines(&grammar, &Default::default()).unwrap(); + let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap(); - let productions: Vec<&Production> = inline_map + let productions = inline_map .inlined_productions(&grammar.variables[0].productions[0], 1) .unwrap() - .collect(); + .collect::>(); assert_eq!( - productions.iter().cloned().cloned().collect::>(), + productions.iter().copied().cloned().collect::>(), vec![ Production { dynamic_precedence: 0, @@ -455,15 +463,15 @@ mod tests { ..Default::default() }; - let inline_map = process_inlines(&grammar, &Default::default()).unwrap(); + let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap(); - let productions: Vec<_> = inline_map + let productions = inline_map .inlined_productions(&grammar.variables[0].productions[0], 0) .unwrap() - .collect(); + .collect::>(); assert_eq!( - productions.iter().cloned().cloned().collect::>(), + productions.iter().copied().cloned().collect::>(), vec![Production { dynamic_precedence: 0, steps: vec![ diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index cb9f6c7..8bf72c3 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -1,21 +1,21 @@ +use std::{ + cmp, + collections::{HashMap, HashSet}, + fmt::Write, + mem::swap, +}; + use super::{ - char_tree::{CharacterTree, Comparator}, + build_tables::Tables, grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType}, + nfa::CharacterSet, rules::{Alias, AliasMap, Symbol, SymbolType}, tables::{ AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry, }, }; -use core::ops::Range; -use std::{ - cmp, - collections::{HashMap, HashSet}, - fmt::Write, - mem::swap, -}; -const LARGE_CHARACTER_RANGE_COUNT: usize = 8; const SMALL_STATE_THRESHOLD: usize = 64; const ABI_VERSION_MIN: usize = 13; const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION; @@ -28,7 +28,7 @@ macro_rules! add { } macro_rules! add_whitespace { - ($this: tt) => {{ + ($this:tt) => {{ for _ in 0..$this.indent_level { write!(&mut $this.buffer, " ").unwrap(); } @@ -44,13 +44,13 @@ macro_rules! add_line { } macro_rules! indent { - ($this: tt) => { + ($this:tt) => { $this.indent_level += 1; }; } macro_rules! dedent { - ($this: tt) => { + ($this:tt) => { assert_ne!($this.indent_level, 0); $this.indent_level -= 1; }; @@ -63,6 +63,8 @@ struct Generator { parse_table: ParseTable, main_lex_table: LexTable, keyword_lex_table: LexTable, + large_character_sets: Vec<(Option, CharacterSet)>, + large_character_set_info: Vec, large_state_count: usize, keyword_capture_token: Option, syntax_grammar: SyntaxGrammar, @@ -79,16 +81,9 @@ struct Generator { abi_version: usize, } -struct TransitionSummary { - is_included: bool, - ranges: Vec>, - call_id: Option, -} - struct LargeCharacterSetInfo { - ranges: Vec>, - symbol: Symbol, - index: usize, + constant_name: String, + is_used: bool, } impl Generator { @@ -118,17 +113,30 @@ impl Generator { self.add_primary_state_id_list(); } + let buffer_offset_before_lex_functions = self.buffer.len(); + let mut main_lex_table = LexTable::default(); swap(&mut main_lex_table, &mut self.main_lex_table); - self.add_lex_function("ts_lex", main_lex_table, true); + self.add_lex_function("ts_lex", main_lex_table); if self.keyword_capture_token.is_some() { let mut keyword_lex_table = LexTable::default(); swap(&mut keyword_lex_table, &mut self.keyword_lex_table); - self.add_lex_function("ts_lex_keywords", keyword_lex_table, false); + self.add_lex_function("ts_lex_keywords", keyword_lex_table); + } + + // Once the lex functions are generated, and we've determined which large + // character sets are actually used, we can generate the large character set + // constants. Insert them into the output buffer before the lex functions. + let lex_functions = self.buffer[buffer_offset_before_lex_functions..].to_string(); + self.buffer.truncate(buffer_offset_before_lex_functions); + for ix in 0..self.large_character_sets.len() { + self.add_character_set(ix); } + self.buffer.push_str(&lex_functions); self.add_lex_modes_list(); + self.add_parse_table(); if !self.syntax_grammar.external_tokens.is_empty() { self.add_external_token_enum(); @@ -136,7 +144,6 @@ impl Generator { self.add_external_scanner_states_list(); } - self.add_parse_table(); self.add_parser_export(); self.buffer @@ -152,54 +159,56 @@ impl Generator { self.symbol_ids[&Symbol::end()].clone(), ); - self.symbol_map = self - .parse_table - .symbols - .iter() - .map(|symbol| { - let mut mapping = symbol; - - // There can be multiple symbols in the grammar that have the same name and kind, - // due to simple aliases. When that happens, ensure that they map to the same - // public-facing symbol. If one of the symbols is not aliased, choose that one - // to be the public-facing symbol. Otherwise, pick the symbol with the lowest - // numeric value. - if let Some(alias) = self.default_aliases.get(symbol) { - let kind = alias.kind(); - for other_symbol in &self.parse_table.symbols { - if let Some(other_alias) = self.default_aliases.get(other_symbol) { - if other_symbol < mapping && other_alias == alias { - mapping = other_symbol; - } - } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { + self.symbol_map = HashMap::new(); + + for symbol in &self.parse_table.symbols { + let mut mapping = symbol; + + // There can be multiple symbols in the grammar that have the same name and kind, + // due to simple aliases. When that happens, ensure that they map to the same + // public-facing symbol. If one of the symbols is not aliased, choose that one + // to be the public-facing symbol. Otherwise, pick the symbol with the lowest + // numeric value. + if let Some(alias) = self.default_aliases.get(symbol) { + let kind = alias.kind(); + for other_symbol in &self.parse_table.symbols { + if let Some(other_alias) = self.default_aliases.get(other_symbol) { + if other_symbol < mapping && other_alias == alias { mapping = other_symbol; - break; } + } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { + mapping = other_symbol; + break; } } - // Two anonymous tokens with different flags but the same string value - // should be represented with the same symbol in the public API. Examples: - // * "<" and token(prec(1, "<")) - // * "(" and token.immediate("(") - else if symbol.is_terminal() { - let metadata = self.metadata_for_symbol(*symbol); - for other_symbol in &self.parse_table.symbols { - let other_metadata = self.metadata_for_symbol(*other_symbol); - if other_metadata == metadata { - mapping = other_symbol; - break; + } + // Two anonymous tokens with different flags but the same string value + // should be represented with the same symbol in the public API. Examples: + // * "<" and token(prec(1, "<")) + // * "(" and token.immediate("(") + else if symbol.is_terminal() { + let metadata = self.metadata_for_symbol(*symbol); + for other_symbol in &self.parse_table.symbols { + let other_metadata = self.metadata_for_symbol(*other_symbol); + if other_metadata == metadata { + if let Some(mapped) = self.symbol_map.get(other_symbol) { + if mapped == symbol { + break; + } } + mapping = other_symbol; + break; } } + } - (*symbol, *mapping) - }) - .collect(); + self.symbol_map.insert(*symbol, *mapping); + } for production_info in &self.parse_table.production_infos { // Build a list of all field names for field_name in production_info.field_map.keys() { - if let Err(i) = self.field_names.binary_search(&field_name) { + if let Err(i) = self.field_names.binary_search(field_name) { self.field_names.insert(i, field_name.clone()); } } @@ -207,38 +216,56 @@ impl Generator { for alias in &production_info.alias_sequence { // Generate a mapping from aliases to C identifiers. if let Some(alias) = &alias { - let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { - if let Some(default_alias) = self.default_aliases.get(symbol) { - default_alias == alias - } else { - let (name, kind) = self.metadata_for_symbol(*symbol); - name == alias.value && kind == alias.kind() - } + let existing_symbol = self.parse_table.symbols.iter().copied().find(|symbol| { + self.default_aliases.get(symbol).map_or_else( + || { + let (name, kind) = self.metadata_for_symbol(*symbol); + name == alias.value && kind == alias.kind() + }, + |default_alias| default_alias == alias, + ) }); // Some aliases match an existing symbol in the grammar. - let alias_id; - if let Some(existing_symbol) = existing_symbol { - alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone(); + let alias_id = if let Some(existing_symbol) = existing_symbol { + self.symbol_ids[&self.symbol_map[&existing_symbol]].clone() } - // Other aliases don't match any existing symbol, and need their own identifiers. + // Other aliases don't match any existing symbol, and need their own + // identifiers. else { if let Err(i) = self.unique_aliases.binary_search(alias) { self.unique_aliases.insert(i, alias.clone()); } - alias_id = if alias.is_named { + if alias.is_named { format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) } else { format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) - }; - } + } + }; self.alias_ids.entry(alias.clone()).or_insert(alias_id); } } } + for (ix, (symbol, _)) in self.large_character_sets.iter().enumerate() { + let count = self.large_character_sets[0..ix] + .iter() + .filter(|(sym, _)| sym == symbol) + .count() + + 1; + let constant_name = if let Some(symbol) = symbol { + format!("{}_character_set_{}", self.symbol_ids[symbol], count) + } else { + format!("extras_character_set_{}", count) + }; + self.large_character_set_info.push(LargeCharacterSetInfo { + constant_name, + is_used: false, + }); + } + // Determine which states should use the "small state" representation, and which should // use the normal array representation. let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2); @@ -254,13 +281,12 @@ impl Generator { } fn add_includes(&mut self) { - add_line!(self, "#include "); + add_line!(self, "#include \"tree_sitter/parser.h\""); add_line!(self, ""); } fn add_pragmas(&mut self) { add_line!(self, "#if defined(__GNUC__) || defined(__clang__)"); - add_line!(self, "#pragma GCC diagnostic push"); add_line!( self, "#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"" @@ -314,7 +340,7 @@ impl Generator { "#define SYMBOL_COUNT {}", self.parse_table.symbols.len() ); - add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len(),); + add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len()); add_line!(self, "#define TOKEN_COUNT {}", token_count); add_line!( self, @@ -336,19 +362,19 @@ impl Generator { } fn add_symbol_enum(&mut self) { - add_line!(self, "enum {{"); + add_line!(self, "enum ts_symbol_identifiers {{"); indent!(self); self.symbol_order.insert(Symbol::end(), 0); let mut i = 1; - for symbol in self.parse_table.symbols.iter() { + for symbol in &self.parse_table.symbols { if *symbol != Symbol::end() { self.symbol_order.insert(*symbol, i); - add_line!(self, "{} = {},", self.symbol_ids[&symbol], i); + add_line!(self, "{} = {},", self.symbol_ids[symbol], i); i += 1; } } for alias in &self.unique_aliases { - add_line!(self, "{} = {},", self.alias_ids[&alias], i); + add_line!(self, "{} = {},", self.alias_ids[alias], i); i += 1; } dedent!(self); @@ -359,20 +385,21 @@ impl Generator { fn add_symbol_names_list(&mut self) { add_line!(self, "static const char * const ts_symbol_names[] = {{"); indent!(self); - for symbol in self.parse_table.symbols.iter() { + for symbol in &self.parse_table.symbols { let name = self.sanitize_string( self.default_aliases .get(symbol) - .map(|alias| alias.value.as_str()) - .unwrap_or(self.metadata_for_symbol(*symbol).0), + .map_or(self.metadata_for_symbol(*symbol).0, |alias| { + alias.value.as_str() + }), ); - add_line!(self, "[{}] = \"{}\",", self.symbol_ids[&symbol], name); + add_line!(self, "[{}] = \"{}\",", self.symbol_ids[symbol], name); } for alias in &self.unique_aliases { add_line!( self, "[{}] = \"{}\",", - self.alias_ids[&alias], + self.alias_ids[alias], self.sanitize_string(&alias.value) ); } @@ -397,8 +424,8 @@ impl Generator { add_line!( self, "[{}] = {},", - self.alias_ids[&alias], - self.alias_ids[&alias], + self.alias_ids[alias], + self.alias_ids[alias], ); } @@ -408,7 +435,7 @@ impl Generator { } fn add_field_name_enum(&mut self) { - add_line!(self, "enum {{"); + add_line!(self, "enum ts_field_identifiers {{"); indent!(self); for (i, field_name) in self.field_names.iter().enumerate() { add_line!(self, "{} = {},", self.field_id(field_name), i + 1); @@ -442,7 +469,7 @@ impl Generator { ); indent!(self); for symbol in &self.parse_table.symbols { - add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]); + add_line!(self, "[{}] = {{", self.symbol_ids[symbol]); indent!(self); if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) { add_line!(self, ".visible = true,"); @@ -474,7 +501,7 @@ impl Generator { add_line!(self, "}},"); } for alias in &self.unique_aliases { - add_line!(self, "[{}] = {{", self.alias_ids[&alias]); + add_line!(self, "[{}] = {{", self.alias_ids[alias]); indent!(self); add_line!(self, ".visible = true,"); add_line!(self, ".named = {},", alias.is_named); @@ -506,7 +533,7 @@ impl Generator { indent!(self); for (j, alias) in production_info.alias_sequence.iter().enumerate() { if let Some(alias) = alias { - add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]); + add_line!(self, "[{}] = {},", j, self.alias_ids[alias]); } } dedent!(self); @@ -525,15 +552,13 @@ impl Generator { if let Some(alias) = &step.alias { if step.symbol.is_non_terminal() && Some(alias) != self.default_aliases.get(&step.symbol) + && self.symbol_ids.contains_key(&step.symbol) { - if self.symbol_ids.contains_key(&step.symbol) { - if let Some(alias_id) = self.alias_ids.get(&alias) { - let alias_ids = alias_ids_by_symbol - .entry(step.symbol) - .or_insert(Vec::new()); - if let Err(i) = alias_ids.binary_search(&alias_id) { - alias_ids.insert(i, alias_id); - } + if let Some(alias_id) = self.alias_ids.get(alias) { + let alias_ids = + alias_ids_by_symbol.entry(step.symbol).or_insert(Vec::new()); + if let Err(i) = alias_ids.binary_search(&alias_id) { + alias_ids.insert(i, alias_id); } } } @@ -552,12 +577,12 @@ impl Generator { indent!(self); for (symbol, alias_ids) in alias_ids_by_symbol { let symbol_id = &self.symbol_ids[symbol]; - let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]]; - add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len()); + let public_symbol_id = &self.symbol_ids[&self.symbol_map[symbol]]; + add_line!(self, "{symbol_id}, {},", 1 + alias_ids.len()); indent!(self); - add_line!(self, "{},", public_symbol_id); + add_line!(self, "{public_symbol_id},"); for alias_id in alias_ids { - add_line!(self, "{},", alias_id); + add_line!(self, "{alias_id},"); } dedent!(self); } @@ -583,7 +608,7 @@ impl Generator { let primary_state = first_state_for_each_core_id .entry(state.core_id) .or_insert(idx); - add_line!(self, "[{}] = {},", idx, primary_state); + add_line!(self, "[{idx}] = {primary_state},"); } dedent!(self); add_line!(self, "}};"); @@ -594,14 +619,16 @@ impl Generator { let mut flat_field_maps = vec![]; let mut next_flat_field_map_index = 0; self.get_field_map_id( - &Vec::new(), + Vec::new(), &mut flat_field_maps, &mut next_flat_field_map_index, ); let mut field_map_ids = Vec::new(); for production_info in &self.parse_table.production_infos { - if !production_info.field_map.is_empty() { + if production_info.field_map.is_empty() { + field_map_ids.push((0, 0)); + } else { let mut flat_field_map = Vec::new(); for (field_name, locations) in &production_info.field_map { for location in locations { @@ -610,14 +637,12 @@ impl Generator { } field_map_ids.push(( self.get_field_map_id( - &flat_field_map, + flat_field_map.clone(), &mut flat_field_maps, &mut next_flat_field_map_index, ), flat_field_map.len(), )); - } else { - field_map_ids.push((0, 0)); } } @@ -630,10 +655,7 @@ impl Generator { if length > 0 { add_line!( self, - "[{}] = {{.index = {}, .length = {}}},", - production_id, - row_id, - length + "[{production_id}] = {{.index = {row_id}, .length = {length}}},", ); } } @@ -647,7 +669,7 @@ impl Generator { ); indent!(self); for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) { - add_line!(self, "[{}] =", row_index); + add_line!(self, "[{row_index}] ="); indent!(self); for (field_name, location) in field_pairs { add_whitespace!(self); @@ -665,101 +687,10 @@ impl Generator { add_line!(self, ""); } - fn add_lex_function( - &mut self, - name: &str, - lex_table: LexTable, - extract_helper_functions: bool, - ) { - let mut ruled_out_chars = HashSet::new(); - let mut large_character_sets = Vec::::new(); - - // For each lex state, compute a summary of the code that needs to be - // generated. - let state_transition_summaries: Vec> = lex_table - .states - .iter() - .map(|state| { - ruled_out_chars.clear(); - - // For each state transition, compute the set of character ranges - // that need to be checked. - state - .advance_actions - .iter() - .map(|(chars, action)| { - let is_included = !chars.contains(std::char::MAX); - let mut ranges; - if is_included { - ranges = chars.simplify_ignoring(&ruled_out_chars); - ruled_out_chars.extend(chars.iter()); - } else { - ranges = chars.clone().negate().simplify_ignoring(&ruled_out_chars); - ranges.insert(0, '\0'..'\0') - } - - // Record any large character sets so that they can be extracted - // into helper functions, reducing code duplication. - let mut call_id = None; - if extract_helper_functions && ranges.len() > LARGE_CHARACTER_RANGE_COUNT { - let char_set_symbol = self - .symbol_for_advance_action(action, &lex_table) - .expect("No symbol for lex state"); - let mut count_for_symbol = 0; - for (i, info) in large_character_sets.iter_mut().enumerate() { - if info.ranges == ranges { - call_id = Some(i); - break; - } - if info.symbol == char_set_symbol { - count_for_symbol += 1; - } - } - if call_id.is_none() { - call_id = Some(large_character_sets.len()); - large_character_sets.push(LargeCharacterSetInfo { - symbol: char_set_symbol, - index: count_for_symbol + 1, - ranges: ranges.clone(), - }); - } - } - - TransitionSummary { - is_included, - ranges, - call_id, - } - }) - .collect() - }) - .collect(); - - // Generate a helper function for each large character set. - let mut sorted_large_char_sets: Vec<_> = large_character_sets.iter().map(|e| e).collect(); - sorted_large_char_sets.sort_unstable_by_key(|info| (info.symbol, info.index)); - for info in sorted_large_char_sets { - add_line!( - self, - "static inline bool {}_character_set_{}(int32_t c) {{", - self.symbol_ids[&info.symbol], - info.index - ); - indent!(self); - add_whitespace!(self); - add!(self, "return "); - let tree = CharacterTree::from_ranges(&info.ranges); - self.add_character_tree(tree.as_ref()); - add!(self, ";\n"); - dedent!(self); - add_line!(self, "}}"); - add_line!(self, ""); - } - + fn add_lex_function(&mut self, name: &str, lex_table: LexTable) { add_line!( self, - "static bool {}(TSLexer *lexer, TSStateId state) {{", - name + "static bool {name}(TSLexer *lexer, TSStateId state) {{", ); indent!(self); @@ -769,9 +700,9 @@ impl Generator { indent!(self); for (i, state) in lex_table.states.into_iter().enumerate() { - add_line!(self, "case {}:", i); + add_line!(self, "case {i}:"); indent!(self); - self.add_lex_state(state, &state_transition_summaries[i], &large_character_sets); + self.add_lex_state(i, state); dedent!(self); } @@ -788,35 +719,7 @@ impl Generator { add_line!(self, ""); } - fn symbol_for_advance_action( - &self, - action: &AdvanceAction, - lex_table: &LexTable, - ) -> Option { - let mut state_ids = vec![action.state]; - let mut i = 0; - while i < state_ids.len() { - let id = state_ids[i]; - let state = &lex_table.states[id]; - if let Some(accept) = state.accept_action { - return Some(accept); - } - for (_, action) in &state.advance_actions { - if !state_ids.contains(&action.state) { - state_ids.push(action.state); - } - } - i += 1; - } - return None; - } - - fn add_lex_state( - &mut self, - state: LexState, - transition_info: &Vec, - large_character_sets: &Vec, - ) { + fn add_lex_state(&mut self, _state_ix: usize, state: LexState) { if let Some(accept_action) = state.accept_action { add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]); } @@ -825,37 +728,176 @@ impl Generator { add_line!(self, "if (eof) ADVANCE({});", eof_action.state); } - for (i, (_, action)) in state.advance_actions.into_iter().enumerate() { - let transition = &transition_info[i]; + let mut chars_copy = CharacterSet::empty(); + let mut large_set = CharacterSet::empty(); + let mut ruled_out_chars = CharacterSet::empty(); + + // The transitions in a lex state are sorted with the single-character + // transitions first. If there are many single-character transitions, + // then implement them using an array of (lookahead character, state) + // pairs, instead of individual if statements, in order to reduce compile + // time. + let mut leading_simple_transition_count = 0; + let mut leading_simple_transition_range_count = 0; + for (chars, action) in &state.advance_actions { + if action.in_main_token + && chars.ranges().all(|r| { + let start = *r.start() as u32; + let end = *r.end() as u32; + end <= start + 1 && end <= u16::MAX as u32 + }) + { + leading_simple_transition_count += 1; + leading_simple_transition_range_count += chars.range_count(); + } else { + break; + } + } + + if leading_simple_transition_range_count >= 8 { + add_line!(self, "ADVANCE_MAP("); + indent!(self); + for (chars, action) in &state.advance_actions[0..leading_simple_transition_count] { + for range in chars.ranges() { + add_whitespace!(self); + self.add_character(*range.start()); + add!(self, ", {},\n", action.state); + if range.end() > range.start() { + add_whitespace!(self); + self.add_character(*range.end()); + add!(self, ", {},\n", action.state); + } + } + ruled_out_chars = ruled_out_chars.add(chars); + } + dedent!(self); + add_line!(self, ");"); + } else { + leading_simple_transition_count = 0; + } + + for (chars, action) in &state.advance_actions[leading_simple_transition_count..] { add_whitespace!(self); - // If there is a helper function for this transition's character - // set, then generate a call to that helper function. - if let Some(call_id) = transition.call_id { - let info = &large_character_sets[call_id]; + // The lex state's advance actions are represented with disjoint + // sets of characters. When translating these disjoint sets into a + // sequence of checks, we don't need to re-check conditions that + // have already been checked due to previous transitions. + // + // Note that this simplification may result in an empty character set. + // That means that the transition is guaranteed (nothing further needs to + // be checked), not that this transition is impossible. + let simplified_chars = chars.simplify_ignoring(&ruled_out_chars); + + // For large character sets, find the best matching character set from + // a pre-selected list of large character sets, which are based on the + // state transitions for invidual tokens. This transition may not exactly + // match one of the pre-selected character sets. In that case, determine + // the additional checks that need to be performed to match this transition. + let mut best_large_char_set: Option<(usize, CharacterSet, CharacterSet)> = None; + if simplified_chars.range_count() >= super::build_tables::LARGE_CHARACTER_RANGE_COUNT { + for (ix, (_, set)) in self.large_character_sets.iter().enumerate() { + chars_copy.assign(&simplified_chars); + large_set.assign(set); + let intersection = chars_copy.remove_intersection(&mut large_set); + if !intersection.is_empty() { + let additions = chars_copy.simplify_ignoring(&ruled_out_chars); + let removals = large_set.simplify_ignoring(&ruled_out_chars); + let total_range_count = additions.range_count() + removals.range_count(); + if total_range_count >= simplified_chars.range_count() { + continue; + } + if let Some((_, best_additions, best_removals)) = &best_large_char_set { + let best_range_count = + best_additions.range_count() + best_removals.range_count(); + if best_range_count < total_range_count { + continue; + } + } + best_large_char_set = Some((ix, additions, removals)); + } + } + } + + // Add this transition's character set to the set of ruled out characters, + // which don't need to be checked for subsequent transitions in this state. + ruled_out_chars = ruled_out_chars.add(chars); + + let mut large_char_set_ix = None; + let mut asserted_chars = simplified_chars; + let mut negated_chars = CharacterSet::empty(); + if let Some((char_set_ix, additions, removals)) = best_large_char_set { + asserted_chars = additions; + negated_chars = removals; + large_char_set_ix = Some(char_set_ix); + } + + let mut line_break = "\n".to_string(); + for _ in 0..self.indent_level + 2 { + line_break.push_str(" "); + } + + let has_positive_condition = large_char_set_ix.is_some() || !asserted_chars.is_empty(); + let has_negative_condition = !negated_chars.is_empty(); + let has_condition = has_positive_condition || has_negative_condition; + if has_condition { add!(self, "if ("); - if !transition.is_included { - add!(self, "!"); + if has_positive_condition && has_negative_condition { + add!(self, "("); } + } + + if let Some(large_char_set_ix) = large_char_set_ix { + let large_set = &self.large_character_sets[large_char_set_ix].1; + + // If the character set contains the null character, check that we + // are not at the end of the file. + let check_eof = large_set.contains('\0'); + if check_eof { + add!(self, "(!eof && ") + } + + let char_set_info = &mut self.large_character_set_info[large_char_set_ix]; + char_set_info.is_used = true; add!( self, - "{}_character_set_{}(lookahead)) ", - self.symbol_ids[&info.symbol], - info.index + "set_contains({}, {}, lookahead)", + &char_set_info.constant_name, + large_set.range_count(), ); - self.add_advance_action(&action); - add!(self, "\n"); - continue; + if check_eof { + add!(self, ")"); + } } - // Otherwise, generate code to compare the lookahead character - // with all of the character ranges. - if transition.ranges.len() > 0 { - add!(self, "if ("); - self.add_character_range_conditions(&transition.ranges, transition.is_included, 2); + if !asserted_chars.is_empty() { + if large_char_set_ix.is_some() { + add!(self, " ||{line_break}"); + } + + // If the character set contains the max character, than it probably + // corresponds to a negated character class in a regex, so it will be more + // concise and readable to express it in terms of negated ranges. + let is_included = !asserted_chars.contains(char::MAX); + if !is_included { + asserted_chars = asserted_chars.negate().add_char('\0'); + } + + self.add_character_range_conditions(&asserted_chars, is_included, &line_break); + } + + if has_negative_condition { + if has_positive_condition { + add!(self, ") &&{line_break}"); + } + self.add_character_range_conditions(&negated_chars, false, &line_break); + } + + if has_condition { add!(self, ") "); } - self.add_advance_action(&action); + + self.add_advance_action(action); add!(self, "\n"); } @@ -864,128 +906,110 @@ impl Generator { fn add_character_range_conditions( &mut self, - ranges: &[Range], + characters: &CharacterSet, is_included: bool, - indent_count: usize, + line_break: &str, ) { - let mut line_break = "\n".to_string(); - for _ in 0..self.indent_level + indent_count { - line_break.push_str(" "); - } - - for (i, range) in ranges.iter().enumerate() { + for (i, range) in characters.ranges().enumerate() { + let start = *range.start(); + let end = *range.end(); if is_included { if i > 0 { - add!(self, " ||{}", line_break); + add!(self, " ||{line_break}"); } - if range.end == range.start { + + if start == '\0' { + add!(self, "(!eof && "); + if end == '\0' { + add!(self, "lookahead == 0"); + } else { + add!(self, "lookahead <= "); + } + self.add_character(end); + add!(self, ")"); + continue; + } else if end == start { add!(self, "lookahead == "); - self.add_character(range.start); - } else if range.end as u32 == range.start as u32 + 1 { + self.add_character(start); + } else if end as u32 == start as u32 + 1 { add!(self, "lookahead == "); - self.add_character(range.start); - add!(self, " ||{}lookahead == ", line_break); - self.add_character(range.end); + self.add_character(start); + add!(self, " ||{line_break}lookahead == "); + self.add_character(end); } else { add!(self, "("); - self.add_character(range.start); + self.add_character(start); add!(self, " <= lookahead && lookahead <= "); - self.add_character(range.end); + self.add_character(end); add!(self, ")"); } } else { if i > 0 { - add!(self, " &&{}", line_break); + add!(self, " &&{line_break}"); } - if range.end == range.start { + if end == start { add!(self, "lookahead != "); - self.add_character(range.start); - } else if range.end as u32 == range.start as u32 + 1 { + self.add_character(start); + } else if end as u32 == start as u32 + 1 { add!(self, "lookahead != "); - self.add_character(range.start); - add!(self, " &&{}lookahead != ", line_break); - self.add_character(range.end); + self.add_character(start); + add!(self, " &&{line_break}lookahead != "); + self.add_character(end); + } else if start != '\0' { + add!(self, "(lookahead < "); + self.add_character(start); + add!(self, " || "); + self.add_character(end); + add!(self, " < lookahead)"); } else { - if range.start != '\0' { - add!(self, "(lookahead < "); - self.add_character(range.start); - add!(self, " || "); - self.add_character(range.end); - add!(self, " < lookahead)"); - } else { - add!(self, "lookahead > "); - self.add_character(range.end); - } + add!(self, "lookahead > "); + self.add_character(end); } } } } - fn add_character_tree(&mut self, tree: Option<&CharacterTree>) { - match tree { - Some(CharacterTree::Compare { - value, - operator, - consequence, - alternative, - }) => { - let op = match operator { - Comparator::Less => "<", - Comparator::LessOrEqual => "<=", - Comparator::Equal => "==", - Comparator::GreaterOrEqual => ">=", - }; - let consequence = consequence.as_ref().map(Box::as_ref); - let alternative = alternative.as_ref().map(Box::as_ref); - - let simple = alternative.is_none() && consequence == Some(&CharacterTree::Yes); - - if !simple { - add!(self, "("); - } - - add!(self, "c {} ", op); - self.add_character(*value); + fn add_character_set(&mut self, ix: usize) { + let characters = self.large_character_sets[ix].1.clone(); + let info = &self.large_character_set_info[ix]; + if !info.is_used { + return; + } - if !simple { - if alternative.is_none() { - add!(self, " && "); - self.add_character_tree(consequence); - } else if consequence == Some(&CharacterTree::Yes) { - add!(self, " || "); - self.add_character_tree(alternative); - } else { - add!(self, "\n"); - indent!(self); - add_whitespace!(self); - add!(self, "? "); - self.add_character_tree(consequence); - add!(self, "\n"); - add_whitespace!(self); - add!(self, ": "); - self.add_character_tree(alternative); - dedent!(self); - } - } + add_line!( + self, + "static TSCharacterRange {}[] = {{", + info.constant_name + ); - if !simple { - add!(self, ")"); + indent!(self); + for (ix, range) in characters.ranges().enumerate() { + let column = ix % 8; + if column == 0 { + if ix > 0 { + add!(self, "\n"); } + add_whitespace!(self); + } else { + add!(self, " "); } - Some(CharacterTree::Yes) => { - add!(self, "true"); - } - None => { - add!(self, "false"); - } + add!(self, "{{"); + self.add_character(*range.start()); + add!(self, ", "); + self.add_character(*range.end()); + add!(self, "}},"); } + add!(self, "\n"); + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); } fn add_advance_action(&mut self, action: &AdvanceAction) { if action.in_main_token { add!(self, "ADVANCE({});", action.state); } else { - add!(self, "SKIP({})", action.state); + add!(self, "SKIP({});", action.state); } } @@ -997,17 +1021,16 @@ impl Generator { indent!(self); for (i, state) in self.parse_table.states.iter().enumerate() { if state.is_end_of_non_terminal_extra() { - add_line!(self, "[{}] = {{(TSStateId)(-1)}},", i,); + add_line!(self, "[{i}] = {{(TSStateId)(-1)}},"); } else if state.external_lex_state_id > 0 { add_line!( self, - "[{}] = {{.lex_state = {}, .external_lex_state = {}}},", - i, + "[{i}] = {{.lex_state = {}, .external_lex_state = {}}},", state.lex_state_id, state.external_lex_state_id ); } else { - add_line!(self, "[{}] = {{.lex_state = {}}},", i, state.lex_state_id); + add_line!(self, "[{i}] = {{.lex_state = {}}},", state.lex_state_id); } } dedent!(self); @@ -1016,7 +1039,7 @@ impl Generator { } fn add_external_token_enum(&mut self) { - add_line!(self, "enum {{"); + add_line!(self, "enum ts_external_scanner_symbol_identifiers {{"); indent!(self); for i in 0..self.syntax_grammar.external_tokens.len() { add_line!( @@ -1041,11 +1064,11 @@ impl Generator { let token = &self.syntax_grammar.external_tokens[i]; let id_token = token .corresponding_internal_token - .unwrap_or(Symbol::external(i)); + .unwrap_or_else(|| Symbol::external(i)); add_line!( self, "[{}] = {},", - self.external_token_id(&token), + self.external_token_id(token), self.symbol_ids[&id_token], ); } @@ -1140,12 +1163,7 @@ impl Generator { &mut parse_table_entries, &mut next_parse_action_list_index, ); - add_line!( - self, - "[{}] = ACTIONS({}),", - self.symbol_ids[symbol], - entry_id - ); + add_line!(self, "[{}] = ACTIONS({entry_id}),", self.symbol_ids[symbol]); } dedent!(self); add_line!(self, "}},"); @@ -1160,7 +1178,7 @@ impl Generator { let mut index = 0; let mut small_state_indices = Vec::new(); - let mut symbols_by_value: HashMap<(usize, SymbolType), Vec> = HashMap::new(); + let mut symbols_by_value = HashMap::<(usize, SymbolType), Vec>::new(); for state in self.parse_table.states.iter().skip(self.large_state_count) { small_state_indices.push(index); symbols_by_value.clear(); @@ -1201,14 +1219,14 @@ impl Generator { (symbols.len(), *kind, *value, symbols[0]) }); - add_line!(self, "[{}] = {},", index, values_with_symbols.len()); + add_line!(self, "[{index}] = {},", values_with_symbols.len()); indent!(self); - for ((value, kind), symbols) in values_with_symbols.iter_mut() { + for ((value, kind), symbols) in &mut values_with_symbols { if *kind == SymbolType::NonTerminal { - add_line!(self, "STATE({}), {},", value, symbols.len()); + add_line!(self, "STATE({value}), {},", symbols.len()); } else { - add_line!(self, "ACTIONS({}), {},", value, symbols.len()); + add_line!(self, "ACTIONS({value}), {},", symbols.len()); } symbols.sort_unstable(); @@ -1239,8 +1257,7 @@ impl Generator { for i in self.large_state_count..self.parse_table.states.len() { add_line!( self, - "[SMALL_STATE({})] = {},", - i, + "[SMALL_STATE({i})] = {},", small_state_indices[i - self.large_state_count] ); } @@ -1249,10 +1266,10 @@ impl Generator { add_line!(self, ""); } - let mut parse_table_entries: Vec<_> = parse_table_entries + let mut parse_table_entries = parse_table_entries .into_iter() .map(|(entry, i)| (i, entry)) - .collect(); + .collect::>(); parse_table_entries.sort_by_key(|(index, _)| *index); self.add_parse_action_list(parse_table_entries); } @@ -1266,8 +1283,7 @@ impl Generator { for (i, entry) in parse_table_entries { add!( self, - " [{}] = {{.entry = {{.count = {}, .reusable = {}}}}},", - i, + " [{i}] = {{.entry = {{.count = {}, .reusable = {}}}}},", entry.actions.len(), entry.reusable ); @@ -1282,9 +1298,9 @@ impl Generator { is_repetition, } => { if is_repetition { - add!(self, "SHIFT_REPEAT({})", state); + add!(self, "SHIFT_REPEAT({state})"); } else { - add!(self, "SHIFT({})", state); + add!(self, "SHIFT({state})"); } } ParseAction::Reduce { @@ -1294,17 +1310,14 @@ impl Generator { production_id, .. } => { - add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count); - if dynamic_precedence != 0 { - add!(self, ", .dynamic_precedence = {}", dynamic_precedence); - } - if production_id != 0 { - add!(self, ", .production_id = {}", production_id); - } - add!(self, ")"); + add!( + self, + "REDUCE({}, {child_count}, {dynamic_precedence}, {production_id})", + self.symbol_ids[&symbol] + ); } } - add!(self, ",") + add!(self, ","); } add!(self, "\n"); } @@ -1315,42 +1328,45 @@ impl Generator { fn add_parser_export(&mut self) { let language_function_name = format!("tree_sitter_{}", self.language_name); - let external_scanner_name = format!("{}_external_scanner", language_function_name); + let external_scanner_name = format!("{language_function_name}_external_scanner"); add_line!(self, "#ifdef __cplusplus"); add_line!(self, r#"extern "C" {{"#); add_line!(self, "#endif"); if !self.syntax_grammar.external_tokens.is_empty() { - add_line!(self, "void *{}_create(void);", external_scanner_name); - add_line!(self, "void {}_destroy(void *);", external_scanner_name); + add_line!(self, "void *{external_scanner_name}_create(void);"); + add_line!(self, "void {external_scanner_name}_destroy(void *);"); add_line!( self, - "bool {}_scan(void *, TSLexer *, const bool *);", - external_scanner_name + "bool {external_scanner_name}_scan(void *, TSLexer *, const bool *);", ); add_line!( self, - "unsigned {}_serialize(void *, char *);", - external_scanner_name + "unsigned {external_scanner_name}_serialize(void *, char *);", ); add_line!( self, - "void {}_deserialize(void *, const char *, unsigned);", - external_scanner_name + "void {external_scanner_name}_deserialize(void *, const char *, unsigned);", ); add_line!(self, ""); } - add_line!(self, "#ifdef _WIN32"); - add_line!(self, "#define extern __declspec(dllexport)"); + add_line!(self, "#ifdef TREE_SITTER_HIDE_SYMBOLS"); + add_line!(self, "#define TS_PUBLIC"); + add_line!(self, "#elif defined(_WIN32)"); + add_line!(self, "#define TS_PUBLIC __declspec(dllexport)"); + add_line!(self, "#else"); + add_line!( + self, + "#define TS_PUBLIC __attribute__((visibility(\"default\")))" + ); add_line!(self, "#endif"); add_line!(self, ""); add_line!( self, - "extern const TSLanguage *{}(void) {{", - language_function_name + "TS_PUBLIC const TSLanguage *{language_function_name}(void) {{", ); indent!(self); add_line!(self, "static const TSLanguage language = {{"); @@ -1410,11 +1426,11 @@ impl Generator { indent!(self); add_line!(self, "&ts_external_scanner_states[0][0],"); add_line!(self, "ts_external_scanner_symbol_map,"); - add_line!(self, "{}_create,", external_scanner_name); - add_line!(self, "{}_destroy,", external_scanner_name); - add_line!(self, "{}_scan,", external_scanner_name); - add_line!(self, "{}_serialize,", external_scanner_name); - add_line!(self, "{}_deserialize,", external_scanner_name); + add_line!(self, "{external_scanner_name}_create,"); + add_line!(self, "{external_scanner_name}_destroy,"); + add_line!(self, "{external_scanner_name}_scan,"); + add_line!(self, "{external_scanner_name}_serialize,"); + add_line!(self, "{external_scanner_name}_deserialize,"); dedent!(self); add_line!(self, "}},"); } @@ -1451,7 +1467,7 @@ impl Generator { fn get_field_map_id( &self, - flat_field_map: &Vec<(String, FieldLocation)>, + flat_field_map: Vec<(String, FieldLocation)>, flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>, next_flat_field_map_index: &mut usize, ) -> usize { @@ -1460,8 +1476,8 @@ impl Generator { } let result = *next_flat_field_map_index; - flat_field_maps.push((result, flat_field_map.clone())); *next_flat_field_map_index += flat_field_map.len(); + flat_field_maps.push((result, flat_field_map)); result } @@ -1500,8 +1516,8 @@ impl Generator { self.symbol_ids.insert(symbol, id); } - fn field_id(&self, field_name: &String) -> String { - format!("field_{}", field_name) + fn field_id(&self, field_name: &str) -> String { + format!("field_{field_name}") } fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) { @@ -1525,54 +1541,93 @@ impl Generator { fn sanitize_identifier(&self, name: &str) -> String { let mut result = String::with_capacity(name.len()); for c in name.chars() { - if ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || ('0' <= c && c <= '9') - || c == '_' - { + if c.is_ascii_alphanumeric() || c == '_' { result.push(c); } else { - let replacement = match c { - '~' => "TILDE", - '`' => "BQUOTE", - '!' => "BANG", - '@' => "AT", - '#' => "POUND", - '$' => "DOLLAR", - '%' => "PERCENT", - '^' => "CARET", - '&' => "AMP", - '*' => "STAR", - '(' => "LPAREN", - ')' => "RPAREN", - '-' => "DASH", - '+' => "PLUS", - '=' => "EQ", - '{' => "LBRACE", - '}' => "RBRACE", - '[' => "LBRACK", - ']' => "RBRACK", - '\\' => "BSLASH", - '|' => "PIPE", - ':' => "COLON", - ';' => "SEMI", - '"' => "DQUOTE", - '\'' => "SQUOTE", - '<' => "LT", - '>' => "GT", - ',' => "COMMA", - '.' => "DOT", - '?' => "QMARK", - '/' => "SLASH", - '\n' => "LF", - '\r' => "CR", - '\t' => "TAB", - _ => continue, - }; - if !result.is_empty() && !result.ends_with("_") { - result.push('_'); + 'special_chars: { + let replacement = match c { + ' ' if name.len() == 1 => "SPACE", + '~' => "TILDE", + '`' => "BQUOTE", + '!' => "BANG", + '@' => "AT", + '#' => "POUND", + '$' => "DOLLAR", + '%' => "PERCENT", + '^' => "CARET", + '&' => "AMP", + '*' => "STAR", + '(' => "LPAREN", + ')' => "RPAREN", + '-' => "DASH", + '+' => "PLUS", + '=' => "EQ", + '{' => "LBRACE", + '}' => "RBRACE", + '[' => "LBRACK", + ']' => "RBRACK", + '\\' => "BSLASH", + '|' => "PIPE", + ':' => "COLON", + ';' => "SEMI", + '"' => "DQUOTE", + '\'' => "SQUOTE", + '<' => "LT", + '>' => "GT", + ',' => "COMMA", + '.' => "DOT", + '?' => "QMARK", + '/' => "SLASH", + '\n' => "LF", + '\r' => "CR", + '\t' => "TAB", + '\0' => "NULL", + '\u{0001}' => "SOH", + '\u{0002}' => "STX", + '\u{0003}' => "ETX", + '\u{0004}' => "EOT", + '\u{0005}' => "ENQ", + '\u{0006}' => "ACK", + '\u{0007}' => "BEL", + '\u{0008}' => "BS", + '\u{000b}' => "VTAB", + '\u{000c}' => "FF", + '\u{000e}' => "SO", + '\u{000f}' => "SI", + '\u{0010}' => "DLE", + '\u{0011}' => "DC1", + '\u{0012}' => "DC2", + '\u{0013}' => "DC3", + '\u{0014}' => "DC4", + '\u{0015}' => "NAK", + '\u{0016}' => "SYN", + '\u{0017}' => "ETB", + '\u{0018}' => "CAN", + '\u{0019}' => "EM", + '\u{001a}' => "SUB", + '\u{001b}' => "ESC", + '\u{001c}' => "FS", + '\u{001d}' => "GS", + '\u{001e}' => "RS", + '\u{001f}' => "US", + '\u{007F}' => "DEL", + '\u{FEFF}' => "BOM", + '\u{0080}'..='\u{FFFF}' => { + result.push_str(&format!("u{:04x}", c as u32)); + break 'special_chars; + } + '\u{10000}'..='\u{10FFFF}' => { + result.push_str(&format!("U{:08x}", c as u32)); + break 'special_chars; + } + '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(), + ' ' => break 'special_chars, + }; + if !result.is_empty() && !result.ends_with('_') { + result.push('_'); + } + result += replacement; } - result += replacement; } } result @@ -1585,10 +1640,19 @@ impl Generator { '\"' => result += "\\\"", '?' => result += "\\?", '\\' => result += "\\\\", + '\u{0007}' => result += "\\a", + '\u{0008}' => result += "\\b", + '\u{000b}' => result += "\\v", '\u{000c}' => result += "\\f", '\n' => result += "\\n", '\r' => result += "\\r", '\t' => result += "\\t", + '\0' => result += "\\0", + '\u{0001}'..='\u{001f}' => result += &format!("\\x{:02x}", c as u32), + '\u{007F}'..='\u{FFFF}' => result += &format!("\\u{:04x}", c as u32), + '\u{10000}'..='\u{10FFFF}' => { + result.push_str(&format!("\\U{:08x}", c as u32)); + } _ => result.push(c), } } @@ -1604,10 +1668,12 @@ impl Generator { '\t' => add!(self, "'\\t'"), '\r' => add!(self, "'\\r'"), _ => { - if c == ' ' || c.is_ascii_graphic() { - add!(self, "'{}'", c) + if c == '\0' { + add!(self, "0") + } else if c == ' ' || c.is_ascii_graphic() { + add!(self, "'{c}'"); } else { - add!(self, "{}", c as u32) + add!(self, "0x{:02x}", c as u32); } } } @@ -1622,43 +1688,40 @@ impl Generator { /// * `parse_table` - The generated parse table for the language /// * `main_lex_table` - The generated lexing table for the language /// * `keyword_lex_table` - The generated keyword lexing table for the language -/// * `keyword_capture_token` - A symbol indicating which token is used -/// for keyword capture, if any. +/// * `keyword_capture_token` - A symbol indicating which token is used for keyword capture, if any. /// * `syntax_grammar` - The syntax grammar extracted from the language's grammar /// * `lexical_grammar` - The lexical grammar extracted from the language's grammar -/// * `default_aliases` - A map describing the global rename rules that should apply. -/// the keys are symbols that are *always* aliased in the same way, and the values -/// are the aliases that are applied to those symbols. -/// * `abi_version` - The language ABI version that should be generated. Usually -/// you want Tree-sitter's current version, but right after making an ABI -/// change, it may be useful to generate code with the previous ABI. -pub(crate) fn render_c_code( +/// * `default_aliases` - A map describing the global rename rules that should apply. the keys are +/// symbols that are *always* aliased in the same way, and the values are the aliases that are +/// applied to those symbols. +/// * `abi_version` - The language ABI version that should be generated. Usually you want +/// Tree-sitter's current version, but right after making an ABI change, it may be useful to +/// generate code with the previous ABI. +#[allow(clippy::too_many_arguments)] +pub fn render_c_code( name: &str, - parse_table: ParseTable, - main_lex_table: LexTable, - keyword_lex_table: LexTable, - keyword_capture_token: Option, + tables: Tables, syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, default_aliases: AliasMap, abi_version: usize, ) -> String { - if !(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version) { - panic!( - "This version of Tree-sitter can only generate parsers with ABI version {} - {}, not {}", - ABI_VERSION_MIN, ABI_VERSION_MAX, abi_version - ); - } + assert!( + (ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version), + "This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {abi_version}", + ); Generator { buffer: String::new(), indent_level: 0, language_name: name.to_string(), large_state_count: 0, - parse_table, - main_lex_table, - keyword_lex_table, - keyword_capture_token, + parse_table: tables.parse_table, + main_lex_table: tables.main_lex_table, + keyword_lex_table: tables.keyword_lex_table, + keyword_capture_token: tables.word_token, + large_character_sets: tables.large_character_sets, + large_character_set_info: Vec::new(), syntax_grammar, lexical_grammar, default_aliases, diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs index 0e3ff89..ab74a14 100644 --- a/cli/src/generate/rules.rs +++ b/cli/src/generate/rules.rs @@ -1,10 +1,11 @@ -use super::grammars::VariableType; -use smallbitvec::SmallBitVec; -use std::iter::FromIterator; use std::{collections::HashMap, fmt}; +use smallbitvec::SmallBitVec; + +use super::grammars::VariableType; + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) enum SymbolType { +pub enum SymbolType { External, End, EndOfNonTerminalExtra, @@ -13,28 +14,29 @@ pub(crate) enum SymbolType { } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) enum Associativity { +pub enum Associativity { Left, Right, } #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct Alias { +pub struct Alias { pub value: String, pub is_named: bool, } -#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] pub enum Precedence { + #[default] None, Integer(i32), Name(String), } -pub(crate) type AliasMap = HashMap; +pub type AliasMap = HashMap; #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] -pub(crate) struct MetadataParams { +pub struct MetadataParams { pub precedence: Precedence, pub dynamic_precedence: i32, pub associativity: Option, @@ -47,16 +49,16 @@ pub(crate) struct MetadataParams { } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct Symbol { +pub struct Symbol { pub kind: SymbolType, pub index: usize, } #[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub(crate) enum Rule { +pub enum Rule { Blank, String(String), - Pattern(String), + Pattern(String, String), NamedSymbol(String), Symbol(Symbol), Choice(Vec), @@ -73,7 +75,7 @@ pub(crate) enum Rule { // index corresponding to a token, and each value representing whether or not // the token is present in the set. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct TokenSet { +pub struct TokenSet { terminal_bits: SmallBitVec, external_bits: SmallBitVec, eof: bool, @@ -81,76 +83,77 @@ pub(crate) struct TokenSet { } impl Rule { - pub fn field(name: String, content: Rule) -> Self { + pub fn field(name: String, content: Self) -> Self { add_metadata(content, move |params| { params.field_name = Some(name); }) } - pub fn alias(content: Rule, value: String, is_named: bool) -> Self { + pub fn alias(content: Self, value: String, is_named: bool) -> Self { add_metadata(content, move |params| { - params.alias = Some(Alias { is_named, value }); + params.alias = Some(Alias { value, is_named }); }) } - pub fn token(content: Rule) -> Self { + pub fn token(content: Self) -> Self { add_metadata(content, |params| { params.is_token = true; }) } - pub fn immediate_token(content: Rule) -> Self { + pub fn immediate_token(content: Self) -> Self { add_metadata(content, |params| { params.is_token = true; params.is_main_token = true; }) } - pub fn prec(value: Precedence, content: Rule) -> Self { + pub fn prec(value: Precedence, content: Self) -> Self { add_metadata(content, |params| { params.precedence = value; }) } - pub fn prec_left(value: Precedence, content: Rule) -> Self { + pub fn prec_left(value: Precedence, content: Self) -> Self { add_metadata(content, |params| { params.associativity = Some(Associativity::Left); params.precedence = value; }) } - pub fn prec_right(value: Precedence, content: Rule) -> Self { + pub fn prec_right(value: Precedence, content: Self) -> Self { add_metadata(content, |params| { params.associativity = Some(Associativity::Right); params.precedence = value; }) } - pub fn prec_dynamic(value: i32, content: Rule) -> Self { + pub fn prec_dynamic(value: i32, content: Self) -> Self { add_metadata(content, |params| { params.dynamic_precedence = value; }) } - pub fn repeat(rule: Rule) -> Self { - Rule::Repeat(Box::new(rule)) + pub fn repeat(rule: Self) -> Self { + Self::Repeat(Box::new(rule)) } - pub fn choice(rules: Vec) -> Self { + pub fn choice(rules: Vec) -> Self { let mut elements = Vec::with_capacity(rules.len()); for rule in rules { choice_helper(&mut elements, rule); } - Rule::Choice(elements) + Self::Choice(elements) } - pub fn seq(rules: Vec) -> Self { - Rule::Seq(rules) + pub fn seq(rules: Vec) -> Self { + Self::Seq(rules) } } impl Alias { - pub fn kind(&self) -> VariableType { + #[must_use] + pub const fn kind(&self) -> VariableType { if self.is_named { VariableType::Named } else { @@ -160,85 +163,101 @@ impl Alias { } impl Precedence { - pub fn is_none(&self) -> bool { - matches!(self, Precedence::None) + #[must_use] + pub const fn is_none(&self) -> bool { + matches!(self, Self::None) } } #[cfg(test)] impl Rule { - pub fn terminal(index: usize) -> Self { - Rule::Symbol(Symbol::terminal(index)) + #[must_use] + pub const fn terminal(index: usize) -> Self { + Self::Symbol(Symbol::terminal(index)) } - pub fn non_terminal(index: usize) -> Self { - Rule::Symbol(Symbol::non_terminal(index)) + #[must_use] + pub const fn non_terminal(index: usize) -> Self { + Self::Symbol(Symbol::non_terminal(index)) } - pub fn external(index: usize) -> Self { - Rule::Symbol(Symbol::external(index)) + #[must_use] + pub const fn external(index: usize) -> Self { + Self::Symbol(Symbol::external(index)) } + #[must_use] pub fn named(name: &'static str) -> Self { - Rule::NamedSymbol(name.to_string()) + Self::NamedSymbol(name.to_string()) } + #[must_use] pub fn string(value: &'static str) -> Self { - Rule::String(value.to_string()) + Self::String(value.to_string()) } - pub fn pattern(value: &'static str) -> Self { - Rule::Pattern(value.to_string()) + #[must_use] + pub fn pattern(value: &'static str, flags: &'static str) -> Self { + Self::Pattern(value.to_string(), flags.to_string()) } } impl Symbol { + #[must_use] pub fn is_terminal(&self) -> bool { self.kind == SymbolType::Terminal } + #[must_use] pub fn is_non_terminal(&self) -> bool { self.kind == SymbolType::NonTerminal } + #[must_use] pub fn is_external(&self) -> bool { self.kind == SymbolType::External } + #[must_use] pub fn is_eof(&self) -> bool { self.kind == SymbolType::End } - pub fn non_terminal(index: usize) -> Self { - Symbol { + #[must_use] + pub const fn non_terminal(index: usize) -> Self { + Self { kind: SymbolType::NonTerminal, index, } } - pub fn terminal(index: usize) -> Self { - Symbol { + #[must_use] + pub const fn terminal(index: usize) -> Self { + Self { kind: SymbolType::Terminal, index, } } - pub fn external(index: usize) -> Self { - Symbol { + #[must_use] + pub const fn external(index: usize) -> Self { + Self { kind: SymbolType::External, index, } } - pub fn end() -> Self { - Symbol { + #[must_use] + pub const fn end() -> Self { + Self { kind: SymbolType::End, index: 0, } } - pub fn end_of_nonterminal_extra() -> Self { - Symbol { + #[must_use] + pub const fn end_of_nonterminal_extra() -> Self { + Self { kind: SymbolType::EndOfNonTerminalExtra, index: 0, } @@ -246,8 +265,9 @@ impl Symbol { } impl From for Rule { + #[must_use] fn from(symbol: Symbol) -> Self { - Rule::Symbol(symbol) + Self::Symbol(symbol) } } @@ -261,7 +281,7 @@ impl TokenSet { } } - pub fn iter<'a>(&'a self) -> impl Iterator + 'a { + pub fn iter(&self) -> impl Iterator + '_ { self.terminal_bits .iter() .enumerate() @@ -292,7 +312,7 @@ impl TokenSet { }) } - pub fn terminals<'a>(&'a self) -> impl Iterator + 'a { + pub fn terminals(&self) -> impl Iterator + '_ { self.terminal_bits .iter() .enumerate() @@ -361,11 +381,9 @@ impl TokenSet { }; } }; - if other.index < vec.len() { - if vec[other.index] { - vec.set(other.index, false); - return true; - } + if other.index < vec.len() && vec[other.index] { + vec.set(other.index, false); + return true; } false } @@ -377,7 +395,7 @@ impl TokenSet { && !self.external_bits.iter().any(|a| a) } - pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool { + pub fn insert_all_terminals(&mut self, other: &Self) -> bool { let mut result = false; if other.terminal_bits.len() > self.terminal_bits.len() { self.terminal_bits.resize(other.terminal_bits.len(), false); @@ -391,7 +409,7 @@ impl TokenSet { result } - fn insert_all_externals(&mut self, other: &TokenSet) -> bool { + fn insert_all_externals(&mut self, other: &Self) -> bool { let mut result = false; if other.external_bits.len() > self.external_bits.len() { self.external_bits.resize(other.external_bits.len(), false); @@ -405,7 +423,7 @@ impl TokenSet { result } - pub fn insert_all(&mut self, other: &TokenSet) -> bool { + pub fn insert_all(&mut self, other: &Self) -> bool { let mut result = false; if other.eof { result |= !self.eof; @@ -466,15 +484,9 @@ fn choice_helper(result: &mut Vec, rule: Rule) { impl fmt::Display for Precedence { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Precedence::Integer(i) => write!(f, "{}", i), - Precedence::Name(s) => write!(f, "'{}'", s), - Precedence::None => write!(f, "none"), + Self::Integer(i) => write!(f, "{i}"), + Self::Name(s) => write!(f, "'{s}'"), + Self::None => write!(f, "none"), } } } - -impl Default for Precedence { - fn default() -> Self { - Precedence::None - } -} diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index 16bf185..541a301 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -1,9 +1,12 @@ -use super::nfa::CharacterSet; -use super::rules::{Alias, Symbol, TokenSet}; use std::collections::BTreeMap; -pub(crate) type ProductionInfoId = usize; -pub(crate) type ParseStateId = usize; -pub(crate) type LexStateId = usize; + +use super::{ + nfa::CharacterSet, + rules::{Alias, Symbol, TokenSet}, +}; +pub type ProductionInfoId = usize; +pub type ParseStateId = usize; +pub type LexStateId = usize; use std::hash::BuildHasherDefault; @@ -11,7 +14,7 @@ use indexmap::IndexMap; use rustc_hash::FxHasher; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub(crate) enum ParseAction { +pub enum ParseAction { Accept, Shift { state: ParseStateId, @@ -28,19 +31,19 @@ pub(crate) enum ParseAction { } #[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub(crate) enum GotoAction { +pub enum GotoAction { Goto(ParseStateId), ShiftExtra, } #[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub(crate) struct ParseTableEntry { +pub struct ParseTableEntry { pub actions: Vec, pub reusable: bool, } #[derive(Clone, Debug, Default, PartialEq, Eq)] -pub(crate) struct ParseState { +pub struct ParseState { pub id: ParseStateId, pub terminal_entries: IndexMap>, pub nonterminal_entries: IndexMap>, @@ -50,19 +53,19 @@ pub(crate) struct ParseState { } #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] -pub(crate) struct FieldLocation { +pub struct FieldLocation { pub index: usize, pub inherited: bool, } #[derive(Debug, Default, PartialEq, Eq)] -pub(crate) struct ProductionInfo { +pub struct ProductionInfo { pub alias_sequence: Vec>, pub field_map: BTreeMap>, } #[derive(Debug, PartialEq, Eq)] -pub(crate) struct ParseTable { +pub struct ParseTable { pub states: Vec, pub symbols: Vec, pub production_infos: Vec, @@ -71,25 +74,25 @@ pub(crate) struct ParseTable { } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct AdvanceAction { +pub struct AdvanceAction { pub state: LexStateId, pub in_main_token: bool, } #[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct LexState { +pub struct LexState { pub accept_action: Option, pub eof_action: Option, pub advance_actions: Vec<(CharacterSet, AdvanceAction)>, } -#[derive(Debug, PartialEq, Eq)] -pub(crate) struct LexTable { +#[derive(Debug, PartialEq, Eq, Default)] +pub struct LexTable { pub states: Vec, } impl ParseTableEntry { - pub fn new() -> Self { + pub const fn new() -> Self { Self { reusable: true, actions: Vec::new(), @@ -97,19 +100,13 @@ impl ParseTableEntry { } } -impl Default for LexTable { - fn default() -> Self { - LexTable { states: Vec::new() } - } -} - impl ParseState { pub fn is_end_of_non_terminal_extra(&self) -> bool { self.terminal_entries .contains_key(&Symbol::end_of_nonterminal_extra()) } - pub fn referenced_states<'a>(&'a self) -> impl Iterator + 'a { + pub fn referenced_states(&self) -> impl Iterator + '_ { self.terminal_entries .iter() .flat_map(|(_, entry)| { @@ -129,7 +126,7 @@ impl ParseState { pub fn update_referenced_states(&mut self, mut f: F) where - F: FnMut(usize, &ParseState) -> usize, + F: FnMut(usize, &Self) -> usize, { let mut updates = Vec::new(); for (symbol, entry) in &self.terminal_entries { diff --git a/cli/src/generate/templates/.editorconfig b/cli/src/generate/templates/.editorconfig new file mode 100644 index 0000000..d3a8b5b --- /dev/null +++ b/cli/src/generate/templates/.editorconfig @@ -0,0 +1,39 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.{json,toml,yml,gyp}] +indent_style = space +indent_size = 2 + +[*.js] +indent_style = space +indent_size = 2 + +[*.rs] +indent_style = space +indent_size = 4 + +[*.{c,cc,h}] +indent_style = space +indent_size = 4 + +[*.{py,pyi}] +indent_style = space +indent_size = 4 + +[*.swift] +indent_style = space +indent_size = 4 + +[*.go] +indent_style = tab +indent_size = 8 + +[Makefile] +indent_style = tab +indent_size = 8 diff --git a/cli/src/generate/templates/PARSER_NAME.h b/cli/src/generate/templates/PARSER_NAME.h new file mode 100644 index 0000000..3dbbfd1 --- /dev/null +++ b/cli/src/generate/templates/PARSER_NAME.h @@ -0,0 +1,16 @@ +#ifndef TREE_SITTER_UPPER_PARSER_NAME_H_ +#define TREE_SITTER_UPPER_PARSER_NAME_H_ + +typedef struct TSLanguage TSLanguage; + +#ifdef __cplusplus +extern "C" { +#endif + +const TSLanguage *tree_sitter_PARSER_NAME(void); + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_UPPER_PARSER_NAME_H_ diff --git a/cli/src/generate/templates/PARSER_NAME.pc.in b/cli/src/generate/templates/PARSER_NAME.pc.in new file mode 100644 index 0000000..deed9fa --- /dev/null +++ b/cli/src/generate/templates/PARSER_NAME.pc.in @@ -0,0 +1,11 @@ +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=@INCLUDEDIR@ + +Name: tree-sitter-PARSER_NAME +Description: CAMEL_PARSER_NAME grammar for tree-sitter +URL: @URL@ +Version: @VERSION@ +Requires: @REQUIRES@ +Libs: -L${libdir} @ADDITIONAL_LIBS@ -ltree-sitter-PARSER_NAME +Cflags: -I${includedir} diff --git a/cli/src/generate/templates/Package.swift b/cli/src/generate/templates/Package.swift new file mode 100644 index 0000000..dfa0c96 --- /dev/null +++ b/cli/src/generate/templates/Package.swift @@ -0,0 +1,47 @@ +// swift-tools-version:5.3 +import PackageDescription + +let package = Package( + name: "TreeSitterCAMEL_PARSER_NAME", + products: [ + .library(name: "TreeSitterCAMEL_PARSER_NAME", targets: ["TreeSitterCAMEL_PARSER_NAME"]), + ], + dependencies: [], + targets: [ + .target(name: "TreeSitterCAMEL_PARSER_NAME", + path: ".", + exclude: [ + "Cargo.toml", + "Makefile", + "binding.gyp", + "bindings/c", + "bindings/go", + "bindings/node", + "bindings/python", + "bindings/rust", + "prebuilds", + "grammar.js", + "package.json", + "package-lock.json", + "pyproject.toml", + "setup.py", + "test", + "examples", + ".editorconfig", + ".github", + ".gitignore", + ".gitattributes", + ".gitmodules", + ], + sources: [ + "src/parser.c", + // NOTE: if your language has an external scanner, add it here. + ], + resources: [ + .copy("queries") + ], + publicHeadersPath: "bindings/swift", + cSettings: [.headerSearchPath("src")]) + ], + cLanguageStandard: .c11 +) diff --git a/cli/src/generate/templates/__init__.py b/cli/src/generate/templates/__init__.py new file mode 100644 index 0000000..d3796cc --- /dev/null +++ b/cli/src/generate/templates/__init__.py @@ -0,0 +1,5 @@ +"CAMEL_PARSER_NAME grammar for tree-sitter" + +from ._binding import language + +__all__ = ["language"] diff --git a/cli/src/generate/templates/__init__.pyi b/cli/src/generate/templates/__init__.pyi new file mode 100644 index 0000000..5416666 --- /dev/null +++ b/cli/src/generate/templates/__init__.pyi @@ -0,0 +1 @@ +def language() -> int: ... diff --git a/cli/src/generate/templates/alloc.h b/cli/src/generate/templates/alloc.h new file mode 100644 index 0000000..1f4466d --- /dev/null +++ b/cli/src/generate/templates/alloc.h @@ -0,0 +1,54 @@ +#ifndef TREE_SITTER_ALLOC_H_ +#define TREE_SITTER_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +// Allow clients to override allocation functions +#ifdef TREE_SITTER_REUSE_ALLOCATOR + +extern void *(*ts_current_malloc)(size_t); +extern void *(*ts_current_calloc)(size_t, size_t); +extern void *(*ts_current_realloc)(void *, size_t); +extern void (*ts_current_free)(void *); + +#ifndef ts_malloc +#define ts_malloc ts_current_malloc +#endif +#ifndef ts_calloc +#define ts_calloc ts_current_calloc +#endif +#ifndef ts_realloc +#define ts_realloc ts_current_realloc +#endif +#ifndef ts_free +#define ts_free ts_current_free +#endif + +#else + +#ifndef ts_malloc +#define ts_malloc malloc +#endif +#ifndef ts_calloc +#define ts_calloc calloc +#endif +#ifndef ts_realloc +#define ts_realloc realloc +#endif +#ifndef ts_free +#define ts_free free +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ALLOC_H_ diff --git a/cli/src/generate/templates/binding.cc b/cli/src/generate/templates/binding.cc deleted file mode 100644 index d68a85a..0000000 --- a/cli/src/generate/templates/binding.cc +++ /dev/null @@ -1,28 +0,0 @@ -#include "tree_sitter/parser.h" -#include -#include "nan.h" - -using namespace v8; - -extern "C" TSLanguage * tree_sitter_PARSER_NAME(); - -namespace { - -NAN_METHOD(New) {} - -void Init(Local exports, Local module) { - Local tpl = Nan::New(New); - tpl->SetClassName(Nan::New("Language").ToLocalChecked()); - tpl->InstanceTemplate()->SetInternalFieldCount(1); - - Local constructor = Nan::GetFunction(tpl).ToLocalChecked(); - Local instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); - Nan::SetInternalFieldPointer(instance, 0, tree_sitter_PARSER_NAME()); - - Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("PARSER_NAME").ToLocalChecked()); - Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance); -} - -NODE_MODULE(tree_sitter_PARSER_NAME_binding, Init) - -} // namespace diff --git a/cli/src/generate/templates/binding.go b/cli/src/generate/templates/binding.go new file mode 100644 index 0000000..b41863c --- /dev/null +++ b/cli/src/generate/templates/binding.go @@ -0,0 +1,13 @@ +package tree_sitter_PARSER_NAME + +// #cgo CFLAGS: -std=c11 -fPIC +// #include "../../src/parser.c" +// // NOTE: if your language has an external scanner, add it here. +import "C" + +import "unsafe" + +// Get the tree-sitter Language for this grammar. +func Language() unsafe.Pointer { + return unsafe.Pointer(C.tree_sitter_LOWER_PARSER_NAME()) +} diff --git a/cli/src/generate/templates/binding.gyp b/cli/src/generate/templates/binding.gyp index ba86afb..9b7feeb 100644 --- a/cli/src/generate/templates/binding.gyp +++ b/cli/src/generate/templates/binding.gyp @@ -2,18 +2,29 @@ "targets": [ { "target_name": "tree_sitter_PARSER_NAME_binding", + "dependencies": [ + "=RUST_BINDING_VERSION" [build-dependencies] -cc = "1.0" +cc = "1.0.87" diff --git a/cli/src/generate/templates/gitattributes b/cli/src/generate/templates/gitattributes new file mode 100644 index 0000000..ffb52ab --- /dev/null +++ b/cli/src/generate/templates/gitattributes @@ -0,0 +1,11 @@ +* text eol=lf + +src/*.json linguist-generated +src/parser.c linguist-generated +src/tree_sitter/* linguist-generated + +bindings/** linguist-generated +binding.gyp linguist-generated +setup.py linguist-generated +Makefile linguist-generated +Package.swift linguist-generated diff --git a/cli/src/generate/templates/gitignore b/cli/src/generate/templates/gitignore new file mode 100644 index 0000000..27fc43f --- /dev/null +++ b/cli/src/generate/templates/gitignore @@ -0,0 +1,38 @@ +# Rust artifacts +Cargo.lock +target/ + +# Node artifacts +build/ +prebuilds/ +node_modules/ +*.tgz + +# Swift artifacts +.build/ + +# Go artifacts +go.sum +_obj/ + +# Python artifacts +.venv/ +dist/ +*.egg-info +*.whl + +# C artifacts +*.a +*.so +*.so.* +*.dylib +*.dll +*.pc + +# Example dirs +/examples/*/ + +# Grammar volatiles +*.wasm +*.obj +*.o diff --git a/cli/src/generate/templates/go.mod b/cli/src/generate/templates/go.mod new file mode 100644 index 0000000..00e31a4 --- /dev/null +++ b/cli/src/generate/templates/go.mod @@ -0,0 +1,5 @@ +module github.com/tree-sitter/tree-sitter-PARSER_NAME + +go 1.22 + +require github.com/smacker/go-tree-sitter v0.0.0-20230720070738-0d0a9f78d8f8 diff --git a/cli/src/generate/templates/grammar.js b/cli/src/generate/templates/grammar.js new file mode 100644 index 0000000..62b7cf3 --- /dev/null +++ b/cli/src/generate/templates/grammar.js @@ -0,0 +1,11 @@ +/// +// @ts-check + +module.exports = grammar({ + name: "LOWER_PARSER_NAME", + + rules: { + // TODO: add the actual grammar rules + source_file: $ => "hello" + } +}); diff --git a/cli/src/generate/templates/index.d.ts b/cli/src/generate/templates/index.d.ts new file mode 100644 index 0000000..efe259e --- /dev/null +++ b/cli/src/generate/templates/index.d.ts @@ -0,0 +1,28 @@ +type BaseNode = { + type: string; + named: boolean; +}; + +type ChildNode = { + multiple: boolean; + required: boolean; + types: BaseNode[]; +}; + +type NodeInfo = + | (BaseNode & { + subtypes: BaseNode[]; + }) + | (BaseNode & { + fields: { [name: string]: ChildNode }; + children: ChildNode[]; + }); + +type Language = { + name: string; + language: unknown; + nodeTypeInfo: NodeInfo[]; +}; + +declare const language: Language; +export = language; diff --git a/cli/src/generate/templates/index.js b/cli/src/generate/templates/index.js index bc5daf7..6657bcf 100644 --- a/cli/src/generate/templates/index.js +++ b/cli/src/generate/templates/index.js @@ -1,18 +1,6 @@ -try { - module.exports = require("../../build/Release/tree_sitter_PARSER_NAME_binding"); -} catch (error1) { - if (error1.code !== 'MODULE_NOT_FOUND') { - throw error1; - } - try { - module.exports = require("../../build/Debug/tree_sitter_PARSER_NAME_binding"); - } catch (error2) { - if (error2.code !== 'MODULE_NOT_FOUND') { - throw error2; - } - throw error1 - } -} +const root = require("path").join(__dirname, "..", ".."); + +module.exports = require("node-gyp-build")(root); try { module.exports.nodeTypeInfo = require("../../src/node-types.json"); diff --git a/cli/src/generate/templates/js-binding.cc b/cli/src/generate/templates/js-binding.cc new file mode 100644 index 0000000..5b167cc --- /dev/null +++ b/cli/src/generate/templates/js-binding.cc @@ -0,0 +1,20 @@ +#include + +typedef struct TSLanguage TSLanguage; + +extern "C" TSLanguage *tree_sitter_PARSER_NAME(); + +// "tree-sitter", "language" hashed with BLAKE2 +const napi_type_tag LANGUAGE_TYPE_TAG = { + 0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16 +}; + +Napi::Object Init(Napi::Env env, Napi::Object exports) { + exports["name"] = Napi::String::New(env, "PARSER_NAME"); + auto language = Napi::External::New(env, tree_sitter_PARSER_NAME()); + language.TypeTag(&LANGUAGE_TYPE_TAG); + exports["language"] = language; + return exports; +} + +NODE_API_MODULE(tree_sitter_PARSER_NAME_binding, Init) diff --git a/cli/src/generate/templates/lib.rs b/cli/src/generate/templates/lib.rs index dab87e4..f5ce6a5 100644 --- a/cli/src/generate/templates/lib.rs +++ b/cli/src/generate/templates/lib.rs @@ -1,13 +1,15 @@ -//! This crate provides PARSER_NAME language support for the [tree-sitter][] parsing library. +//! This crate provides CAMEL_PARSER_NAME language support for the [tree-sitter][] parsing library. //! //! Typically, you will use the [language][language func] function to add this language to a //! tree-sitter [Parser][], and then use the parser to parse some code: //! //! ``` -//! let code = ""; +//! let code = r#" +//! "#; //! let mut parser = tree_sitter::Parser::new(); -//! parser.set_language(tree_sitter_PARSER_NAME::language()).expect("Error loading PARSER_NAME grammar"); +//! parser.set_language(&tree_sitter_PARSER_NAME::language()).expect("Error loading CAMEL_PARSER_NAME grammar"); //! let tree = parser.parse(code, None).unwrap(); +//! assert!(!tree.root_node().has_error()); //! ``` //! //! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html @@ -31,14 +33,14 @@ pub fn language() -> Language { /// The content of the [`node-types.json`][] file for this grammar. /// /// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types -pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json"); +pub const NODE_TYPES: &str = include_str!("../../src/node-types.json"); // Uncomment these to include any queries that this grammar contains -// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm"); -// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm"); -// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm"); -// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm"); +// pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm"); +// pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm"); +// pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm"); +// pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm"); #[cfg(test)] mod tests { @@ -46,7 +48,7 @@ mod tests { fn test_can_load_grammar() { let mut parser = tree_sitter::Parser::new(); parser - .set_language(super::language()) - .expect("Error loading PARSER_NAME language"); + .set_language(&super::language()) + .expect("Error loading CAMEL_PARSER_NAME grammar"); } } diff --git a/cli/src/generate/templates/makefile b/cli/src/generate/templates/makefile new file mode 100644 index 0000000..0492ec0 --- /dev/null +++ b/cli/src/generate/templates/makefile @@ -0,0 +1,112 @@ +VERSION := 0.0.1 + +LANGUAGE_NAME := tree-sitter-PARSER_NAME + +# repository +SRC_DIR := src + +PARSER_REPO_URL := $(shell git -C $(SRC_DIR) remote get-url origin 2>/dev/null) + +ifeq ($(PARSER_URL),) + PARSER_URL := $(subst .git,,$(PARSER_REPO_URL)) +ifeq ($(shell echo $(PARSER_URL) | grep '^[a-z][-+.0-9a-z]*://'),) + PARSER_URL := $(subst :,/,$(PARSER_URL)) + PARSER_URL := $(subst git@,https://,$(PARSER_URL)) +endif +endif + +TS ?= tree-sitter + +# ABI versioning +SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION))) +SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION))) + +# install directory layout +PREFIX ?= /usr/local +INCLUDEDIR ?= $(PREFIX)/include +LIBDIR ?= $(PREFIX)/lib +PCLIBDIR ?= $(LIBDIR)/pkgconfig + +# source/object files +PARSER := $(SRC_DIR)/parser.c +EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c)) +OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS)) + +# flags +ARFLAGS ?= rcs +override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC + +# OS-specific bits +ifeq ($(OS),Windows_NT) + $(error "Windows is not supported") +else ifeq ($(shell uname),Darwin) + SOEXT = dylib + SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib + SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib + LINKSHARED := $(LINKSHARED)-dynamiclib -Wl, + ifneq ($(ADDITIONAL_LIBS),) + LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS), + endif + LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks +else + SOEXT = so + SOEXTVER_MAJOR = so.$(SONAME_MAJOR) + SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR) + LINKSHARED := $(LINKSHARED)-shared -Wl, + ifneq ($(ADDITIONAL_LIBS),) + LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS) + endif + LINKSHARED := $(LINKSHARED)-soname,lib$(LANGUAGE_NAME).so.$(SONAME_MAJOR) +endif +ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),) + PCLIBDIR := $(PREFIX)/libdata/pkgconfig +endif + +all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc + +lib$(LANGUAGE_NAME).a: $(OBJS) + $(AR) $(ARFLAGS) $@ $^ + +lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS) + $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ +ifneq ($(STRIP),) + $(STRIP) $@ +endif + +$(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in + sed -e 's|@URL@|$(PARSER_URL)|' \ + -e 's|@VERSION@|$(VERSION)|' \ + -e 's|@LIBDIR@|$(LIBDIR)|' \ + -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \ + -e 's|@REQUIRES@|$(REQUIRES)|' \ + -e 's|@ADDITIONAL_LIBS@|$(ADDITIONAL_LIBS)|' \ + -e 's|=$(PREFIX)|=$${prefix}|' \ + -e 's|@PREFIX@|$(PREFIX)|' $< > $@ + +$(PARSER): $(SRC_DIR)/grammar.json + $(TS) generate --no-bindings $^ + +install: all + install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)' + install -m644 bindings/c/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h + install -m644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc + install -m644 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a + install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) + ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) + ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) + +uninstall: + $(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \ + '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \ + '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \ + '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \ + '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \ + '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc + +clean: + $(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) + +test: + $(TS) test + +.PHONY: all install uninstall clean test diff --git a/cli/src/generate/templates/package.json b/cli/src/generate/templates/package.json index 1859879..39b5f71 100644 --- a/cli/src/generate/templates/package.json +++ b/cli/src/generate/templates/package.json @@ -1,19 +1,53 @@ { "name": "tree-sitter-PARSER_NAME", "version": "0.0.1", - "description": "PARSER_NAME grammar for tree-sitter", + "description": "CAMEL_PARSER_NAME grammar for tree-sitter", + "repository": "github:tree-sitter/tree-sitter-PARSER_NAME", + "license": "MIT", "main": "bindings/node", + "types": "bindings/node", "keywords": [ + "incremental", "parsing", - "incremental" + "tree-sitter", + "LOWER_PARSER_NAME" + ], + "files": [ + "grammar.js", + "binding.gyp", + "prebuilds/**", + "bindings/node/*", + "queries/*", + "src/**" ], "dependencies": { - "nan": "^2.12.1" + "node-addon-api": "^7.1.0", + "node-gyp-build": "^4.8.0" }, "devDependencies": { + "prebuildify": "^6.0.0", "tree-sitter-cli": "^CLI_VERSION" }, + "peerDependencies": { + "tree-sitter": "^0.21.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + }, "scripts": { - "test": "tree-sitter test" - } + "install": "node-gyp-build", + "prebuildify": "prebuildify --napi --strip", + "build": "tree-sitter generate --no-bindings", + "build-wasm": "tree-sitter build --wasm", + "test": "tree-sitter test", + "parse": "tree-sitter parse" + }, + "tree-sitter": [ + { + "scope": "source.LOWER_PARSER_NAME", + "injection-regex": "^LOWER_PARSER_NAME$" + } + ] } diff --git a/cli/src/generate/templates/py-binding.c b/cli/src/generate/templates/py-binding.c new file mode 100644 index 0000000..e2fed9b --- /dev/null +++ b/cli/src/generate/templates/py-binding.c @@ -0,0 +1,27 @@ +#include + +typedef struct TSLanguage TSLanguage; + +TSLanguage *tree_sitter_LOWER_PARSER_NAME(void); + +static PyObject* _binding_language(PyObject *self, PyObject *args) { + return PyLong_FromVoidPtr(tree_sitter_LOWER_PARSER_NAME()); +} + +static PyMethodDef methods[] = { + {"language", _binding_language, METH_NOARGS, + "Get the tree-sitter language for this grammar."}, + {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_binding", + .m_doc = NULL, + .m_size = -1, + .m_methods = methods +}; + +PyMODINIT_FUNC PyInit__binding(void) { + return PyModule_Create(&module); +} diff --git a/cli/src/generate/templates/pyproject.toml b/cli/src/generate/templates/pyproject.toml new file mode 100644 index 0000000..272dbb1 --- /dev/null +++ b/cli/src/generate/templates/pyproject.toml @@ -0,0 +1,29 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "tree-sitter-PARSER_NAME" +description = "CAMEL_PARSER_NAME grammar for tree-sitter" +version = "0.0.1" +keywords = ["incremental", "parsing", "tree-sitter", "PARSER_NAME"] +classifiers = [ + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Topic :: Software Development :: Compilers", + "Topic :: Text Processing :: Linguistic", + "Typing :: Typed" +] +requires-python = ">=3.8" +license.text = "MIT" +readme = "README.md" + +[project.urls] +Homepage = "https://github.com/tree-sitter/tree-sitter-PARSER_NAME" + +[project.optional-dependencies] +core = ["tree-sitter~=0.21"] + +[tool.cibuildwheel] +build = "cp38-*" +build-frontend = "build" diff --git a/cli/src/generate/templates/setup.py b/cli/src/generate/templates/setup.py new file mode 100644 index 0000000..85547e7 --- /dev/null +++ b/cli/src/generate/templates/setup.py @@ -0,0 +1,60 @@ +from os.path import isdir, join +from platform import system + +from setuptools import Extension, find_packages, setup +from setuptools.command.build import build +from wheel.bdist_wheel import bdist_wheel + + +class Build(build): + def run(self): + if isdir("queries"): + dest = join(self.build_lib, "tree_sitter_PARSER_NAME", "queries") + self.copy_tree("queries", dest) + super().run() + + +class BdistWheel(bdist_wheel): + def get_tag(self): + python, abi, platform = super().get_tag() + if python.startswith("cp"): + python, abi = "cp38", "abi3" + return python, abi, platform + + +setup( + packages=find_packages("bindings/python"), + package_dir={"": "bindings/python"}, + package_data={ + "tree_sitter_LOWER_PARSER_NAME": ["*.pyi", "py.typed"], + "tree_sitter_LOWER_PARSER_NAME.queries": ["*.scm"], + }, + ext_package="tree_sitter_LOWER_PARSER_NAME", + ext_modules=[ + Extension( + name="_binding", + sources=[ + "bindings/python/tree_sitter_LOWER_PARSER_NAME/binding.c", + "src/parser.c", + # NOTE: if your language uses an external scanner, add it here. + ], + extra_compile_args=[ + "-std=c11", + ] if system() != "Windows" else [ + "/std:c11", + "/utf-8", + ], + define_macros=[ + ("Py_LIMITED_API", "0x03080000"), + ("PY_SSIZE_T_CLEAN", None) + ], + include_dirs=["src"], + py_limited_api=True, + ) + ], + cmdclass={ + "build": Build, + "bdist_wheel": BdistWheel + }, + zip_safe=False +) diff --git a/cli/src/highlight.rs b/cli/src/highlight.rs index 6cf1580..f350778 100644 --- a/cli/src/highlight.rs +++ b/cli/src/highlight.rs @@ -1,19 +1,16 @@ -use super::util; +use std::{ + collections::HashMap, fmt::Write, fs, io, path, str, sync::atomic::AtomicUsize, time::Instant, +}; + use ansi_term::Color; use anyhow::Result; use lazy_static::lazy_static; -use serde::ser::SerializeMap; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer}; use serde_json::{json, Value}; -use std::collections::HashMap; -use std::fmt::Write; -use std::sync::atomic::AtomicUsize; -use std::time::Instant; -use std::{fs, io, path, str, usize}; use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer}; use tree_sitter_loader::Loader; -pub const HTML_HEADER: &'static str = " +pub const HTML_HEADER: &str = " Tree-sitter Highlighting @@ -35,7 +32,7 @@ pub const HTML_HEADER: &'static str = " "; -pub const HTML_FOOTER: &'static str = " +pub const HTML_FOOTER: &str = " "; @@ -68,13 +65,14 @@ impl Theme { Ok(serde_json::from_str(&json).unwrap_or_default()) } + #[must_use] pub fn default_style(&self) -> Style { Style::default() } } impl<'de> Deserialize<'de> for Theme { - fn deserialize(deserializer: D) -> std::result::Result + fn deserialize(deserializer: D) -> std::result::Result where D: Deserializer<'de>, { @@ -144,9 +142,7 @@ impl Serialize for Theme { impl Default for Theme { fn default() -> Self { - serde_json::from_str( - r#" - { + serde_json::from_value(json!({ "attribute": {"color": 124, "italic": true}, "comment": {"color": 245, "italic": true}, "constant.builtin": {"color": 94, "bold": true}, @@ -169,9 +165,7 @@ impl Default for Theme { "type.builtin": {"color": 23, "bold": true}, "variable.builtin": {"bold": true}, "variable.parameter": {"underline": true} - } - "#, - ) + })) .unwrap() } } @@ -182,17 +176,17 @@ fn parse_style(style: &mut Style, json: Value) { match property_name.as_str() { "bold" => { if value == Value::Bool(true) { - style.ansi = style.ansi.bold() + style.ansi = style.ansi.bold(); } } "italic" => { if value == Value::Bool(true) { - style.ansi = style.ansi.italic() + style.ansi = style.ansi.italic(); } } "underline" => { if value == Value::Bool(true) { - style.ansi = style.ansi.underline() + style.ansi = style.ansi.underline(); } } "color" => { @@ -220,10 +214,7 @@ fn parse_style(style: &mut Style, json: Value) { fn parse_color(json: Value) -> Option { match json { - Value::Number(n) => match n.as_u64() { - Some(n) => Some(Color::Fixed(n as u8)), - _ => None, - }, + Value::Number(n) => n.as_u64().map(|n| Color::Fixed(n as u8)), Value::String(s) => match s.to_lowercase().as_str() { "black" => Some(Color::Black), "blue" => Some(Color::Blue), @@ -234,7 +225,7 @@ fn parse_color(json: Value) -> Option { "white" => Some(Color::White), "yellow" => Some(Color::Yellow), s => { - if let Some((red, green, blue)) = hex_string_to_rgb(&s) { + if let Some((red, green, blue)) = hex_string_to_rgb(s) { Some(Color::RGB(red, green, blue)) } else { None @@ -246,7 +237,7 @@ fn parse_color(json: Value) -> Option { } fn hex_string_to_rgb(s: &str) -> Option<(u8, u8, u8)> { - if s.starts_with("#") && s.len() >= 7 { + if s.starts_with('#') && s.len() >= 7 { if let (Ok(red), Ok(green), Ok(blue)) = ( u8::from_str_radix(&s[1..3], 16), u8::from_str_radix(&s[3..5], 16), @@ -281,7 +272,7 @@ fn style_to_css(style: ansi_term::Style) -> String { fn write_color(buffer: &mut String, color: Color) { if let Color::RGB(r, g, b) = &color { - write!(buffer, "color: #{:x?}{:x?}{:x?}", r, g, b).unwrap() + write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap(); } else { write!( buffer, @@ -299,18 +290,14 @@ fn write_color(buffer: &mut String, color: Color) { Color::RGB(_, _, _) => unreachable!(), } ) - .unwrap() + .unwrap(); } } fn terminal_supports_truecolor() -> bool { - use std::env; - - if let Ok(truecolor) = env::var("COLORTERM") { + std::env::var("COLORTERM").map_or(false, |truecolor| { truecolor == "truecolor" || truecolor == "24bit" - } else { - false - } + }) } fn closest_xterm_color(red: u8, green: u8, blue: u8) -> Color { @@ -324,9 +311,9 @@ fn closest_xterm_color(red: u8, green: u8, blue: u8) -> Color { // Get the xterm color with the minimum Euclidean distance to the target color // i.e. distance = √ (r2 - r1)² + (g2 - g1)² + (b2 - b1)² let distances = colors.map(|(color_id, (r, g, b))| { - let r_delta: u32 = (max(r, red) - min(r, red)).into(); - let g_delta: u32 = (max(g, green) - min(g, green)).into(); - let b_delta: u32 = (max(b, blue) - min(b, blue)).into(); + let r_delta = (max(r, red) - min(r, red)) as u32; + let g_delta = (max(g, green) - min(g, green)) as u32; + let b_delta = (max(b, blue) - min(b, blue)) as u32; let distance = r_delta.pow(2) + g_delta.pow(2) + b_delta.pow(2); // don't need to actually take the square root for the sake of comparison (color_id, distance) @@ -385,40 +372,38 @@ pub fn html( config: &HighlightConfiguration, quiet: bool, print_time: bool, + cancellation_flag: Option<&AtomicUsize>, ) -> Result<()> { use std::io::Write; let stdout = io::stdout(); let mut stdout = stdout.lock(); let time = Instant::now(); - let cancellation_flag = util::cancel_on_stdin(); let mut highlighter = Highlighter::new(); - let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| { + let events = highlighter.highlight(config, source, cancellation_flag, |string| { loader.highlight_config_for_injection_string(string) })?; let mut renderer = HtmlRenderer::new(); renderer.render(events, source, &move |highlight| { - if let Some(css_style) = &theme.styles[highlight.0].css { - css_style.as_bytes() - } else { - "".as_bytes() - } + theme.styles[highlight.0] + .css + .as_ref() + .map_or_else(|| "".as_bytes(), |css_style| css_style.as_bytes()) })?; if !quiet { - write!(&mut stdout, "\n")?; + writeln!(&mut stdout, "
")?; for (i, line) in renderer.lines().enumerate() { - write!( + writeln!( &mut stdout, - "\n", + "", i + 1, - line )?; } - write!(&mut stdout, "
{}{}
{}{line}
\n")?; + writeln!(&mut stdout, "")?; } if print_time { @@ -430,11 +415,12 @@ pub fn html( #[cfg(test)] mod tests { - use super::*; use std::env; - const JUNGLE_GREEN: &'static str = "#26A69A"; - const DARK_CYAN: &'static str = "#00AF87"; + use super::*; + + const JUNGLE_GREEN: &str = "#26A69A"; + const DARK_CYAN: &str = "#00AF87"; #[test] fn test_parse_style() { @@ -448,7 +434,7 @@ mod tests { env::set_var("COLORTERM", ""); parse_style(&mut style, Value::String(DARK_CYAN.to_string())); assert_eq!(style.ansi.foreground, Some(Color::Fixed(36))); - assert_eq!(style.css, Some("style=\'color: #0af87\'".to_string())); + assert_eq!(style.css, Some("style=\'color: #00af87\'".to_string())); // junglegreen is not an ANSI color and is preserved when the terminal supports it env::set_var("COLORTERM", "truecolor"); diff --git a/cli/src/lib.rs b/cli/src/lib.rs index d36417c..549db77 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -1,3 +1,5 @@ +#![doc = include_str!("../README.md")] + pub mod generate; pub mod highlight; pub mod logger; @@ -14,3 +16,7 @@ pub mod wasm; #[cfg(test)] mod tests; + +// To run compile fail tests +#[cfg(doctest)] +mod tests; diff --git a/cli/src/main.rs b/cli/src/main.rs index fb2a632..94df332 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,21 +1,373 @@ +use std::{ + collections::HashSet, + env, fs, + path::{Path, PathBuf}, +}; + +use anstyle::{AnsiColor, Color, Style}; use anyhow::{anyhow, Context, Result}; -use clap::{App, AppSettings, Arg, SubCommand}; +use clap::{crate_authors, Args, Command, FromArgMatches as _, Subcommand}; use glob::glob; -use std::path::{Path, PathBuf}; -use std::{env, fs, u64}; -use tree_sitter::Point; -use tree_sitter_cli::parse::ParseOutput; +use regex::Regex; +use tree_sitter::{ffi, Parser, Point}; use tree_sitter_cli::{ - generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags, - util, wasm, + generate::{self, lookup_package_json_for_path}, + highlight, logger, + parse::{self, ParseFileOptions, ParseOutput}, + playground, query, tags, + test::{self, TestOptions}, + test_highlight, test_tags, util, wasm, }; use tree_sitter_config::Config; +use tree_sitter_highlight::Highlighter; use tree_sitter_loader as loader; +use tree_sitter_tags::TagsContext; -const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION"); +const BUILD_VERSION: &str = env!("CARGO_PKG_VERSION"); const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA"); const DEFAULT_GENERATE_ABI_VERSION: usize = 14; +#[derive(Subcommand)] +#[command(about="Generates and tests parsers", author=crate_authors!("\n"), styles=get_styles())] +enum Commands { + InitConfig(InitConfig), + Generate(Generate), + Build(Build), + BuildWasm(BuildWasm), + Parse(Parse), + Test(Test), + Query(Query), + Highlight(Highlight), + Tags(Tags), + Playground(Playground), + DumpLanguages(DumpLanguages), +} + +#[derive(Args)] +#[command(about = "Generate a default config file")] +struct InitConfig; + +#[derive(Args)] +#[command(about = "Generate a parser", alias = "gen", alias = "g")] +struct Generate { + #[arg(index = 1, help = "The path to the grammar file")] + pub grammar_path: Option, + #[arg(long, short, help = "Show debug log during generation")] + pub log: bool, + #[arg( + long = "abi", + value_name = "VERSION", + help = format!(concat!( + "Select the language ABI version to generate (default {}).\n", + "Use --abi=latest to generate the newest supported version ({}).", + ), + DEFAULT_GENERATE_ABI_VERSION, + tree_sitter::LANGUAGE_VERSION, + ) + )] + pub abi_version: Option, + #[arg(long, help = "Don't generate language bindings")] + pub no_bindings: bool, + #[arg( + long, + short = 'b', + help = "Compile all defined languages in the current dir" + )] + pub build: bool, + #[arg(long, short = '0', help = "Compile a parser in debug mode")] + pub debug_build: bool, + #[arg( + long, + value_name = "PATH", + help = "The path to the directory containing the parser library" + )] + pub libdir: Option, + #[arg( + long, + help = "Produce a report of the states for the given rule, use `-` to report every rule" + )] + pub report_states_for_rule: Option, + + #[arg( + long, + value_name = "EXECUTABLE", + env = "TREE_SITTER_JS_RUNTIME", + help = "The path to the JavaScript runtime to use for generating parsers" + )] + pub js_runtime: Option, +} + +#[derive(Args)] +#[command(about = "Compile a parser", alias = "b")] +struct Build { + #[arg(short, long, help = "Build a WASM module instead of a dynamic library")] + pub wasm: bool, + #[arg( + short, + long, + help = "Run emscripten via docker even if it is installed locally (only if building a WASM module with --wasm)" + )] + pub docker: bool, + #[arg(short, long, help = "The path to output the compiled file")] + pub output: Option, + #[arg(index = 1, num_args = 1, help = "The path to the grammar directory")] + pub path: Option, + #[arg(long, help = "Make the parser reuse the same allocator as the library")] + pub reuse_allocator: bool, + #[arg( + long, + help = "Build the parser with `TREE_SITTER_INTERNAL_BUILD` defined" + )] + pub internal_build: bool, +} + +#[derive(Args)] +#[command(about = "Compile a parser to WASM", alias = "bw")] +struct BuildWasm { + #[arg( + short, + long, + help = "Run emscripten via docker even if it is installed locally" + )] + pub docker: bool, + #[arg(index = 1, num_args = 1, help = "The path to output the wasm file")] + pub path: Option, +} + +#[derive(Args)] +#[command(about = "Parse files", alias = "p")] +struct Parse { + #[arg( + long = "paths", + help = "The path to a file with paths to source file(s)" + )] + pub paths_file: Option, + #[arg(num_args=1.., help = "The source file(s) to use")] + pub paths: Option>, + #[arg( + long, + help = "Select a language by the scope instead of a file extension" + )] + pub scope: Option, + #[arg(long, short = 'd', help = "Show parsing debug log")] + pub debug: bool, + #[arg(long, short = '0', help = "Compile a parser in debug mode")] + pub debug_build: bool, + #[arg( + long, + short = 'D', + help = "Produce the log.html file with debug graphs" + )] + pub debug_graph: bool, + #[arg( + long, + help = "Compile parsers to wasm instead of native dynamic libraries" + )] + pub wasm: bool, + #[arg(long = "dot", help = "Output the parse data with graphviz dot")] + pub output_dot: bool, + #[arg( + long = "xml", + short = 'x', + help = "Output the parse data in XML format" + )] + pub output_xml: bool, + #[arg(long, short, help = "Show parsing statistic")] + pub stat: bool, + #[arg(long, help = "Interrupt the parsing process by timeout (µs)")] + pub timeout: Option, + #[arg(long, short, help = "Measure execution time")] + pub time: bool, + #[arg(long, short, help = "Suppress main output")] + pub quiet: bool, + #[arg( + long, + num_args = 1.., + help = "Apply edits in the format: \"row, col delcount insert_text\"" + )] + pub edits: Option>, + #[arg(long, help = "The encoding of the input files")] + pub encoding: Option, + #[arg( + long, + help = "Open `log.html` in the default browser, if `--debug-graph` is supplied" + )] + pub open_log: bool, + #[arg(long, help = "The path to an alternative config.json file")] + pub config_path: Option, + #[arg(long, short = 'n', help = "Parse the contents of a specific test")] + #[clap(conflicts_with = "paths", conflicts_with = "paths_file")] + pub test_number: Option, +} + +#[derive(Args)] +#[command(about = "Run a parser's tests", alias = "t")] +struct Test { + #[arg( + long, + short, + help = "Only run corpus test cases whose name includes the given string" + )] + pub filter: Option, + #[arg( + long, + short, + help = "Only run corpus test cases whose name matches the given regex" + )] + pub include: Option, + #[arg( + long, + short, + help = "Only run corpus test cases whose name does not match the given regex" + )] + pub exclude: Option, + #[arg( + long, + short, + help = "Update all syntax trees in corpus files with current parser output" + )] + pub update: bool, + #[arg(long, short = 'd', help = "Show parsing debug log")] + pub debug: bool, + #[arg(long, short = '0', help = "Compile a parser in debug mode")] + pub debug_build: bool, + #[arg( + long, + short = 'D', + help = "Produce the log.html file with debug graphs" + )] + pub debug_graph: bool, + #[arg( + long, + help = "Compile parsers to wasm instead of native dynamic libraries" + )] + pub wasm: bool, + #[arg( + long, + help = "Open `log.html` in the default browser, if `--debug-graph` is supplied" + )] + pub open_log: bool, + #[arg(long, help = "The path to an alternative config.json file")] + pub config_path: Option, +} + +#[derive(Args)] +#[command(about = "Search files using a syntax tree query", alias = "q")] +struct Query { + #[arg(help = "Path to a file with queries", index = 1, required = true)] + query_path: String, + #[arg(long, short, help = "Measure execution time")] + pub time: bool, + #[arg(long, short, help = "Suppress main output")] + pub quiet: bool, + #[arg( + long = "paths", + help = "The path to a file with paths to source file(s)" + )] + pub paths_file: Option, + #[arg(index = 2, num_args=1.., help = "The source file(s) to use")] + pub paths: Option>, + #[arg( + long, + help = "The range of byte offsets in which the query will be executed" + )] + pub byte_range: Option, + #[arg(long, help = "The range of rows in which the query will be executed")] + pub row_range: Option, + #[arg( + long, + help = "Select a language by the scope instead of a file extension" + )] + pub scope: Option, + #[arg(long, short, help = "Order by captures instead of matches")] + pub captures: bool, + #[arg(long, help = "Whether to run query tests or not")] + pub test: bool, + #[arg(long, help = "The path to an alternative config.json file")] + pub config_path: Option, +} + +#[derive(Args)] +#[command(about = "Highlight a file", alias = "hi")] +struct Highlight { + #[arg(long, short = 'H', help = "Generate highlighting as an HTML document")] + pub html: bool, + #[arg( + long, + help = "Check that highlighting captures conform strictly to standards" + )] + pub check: bool, + #[arg(long, help = "The path to a file with captures")] + pub captures_path: Option, + #[arg(long, num_args = 1.., help = "The paths to files with queries")] + pub query_paths: Option>, + #[arg( + long, + help = "Select a language by the scope instead of a file extension" + )] + pub scope: Option, + #[arg(long, short, help = "Measure execution time")] + pub time: bool, + #[arg(long, short, help = "Suppress main output")] + pub quiet: bool, + #[arg( + long = "paths", + help = "The path to a file with paths to source file(s)" + )] + pub paths_file: Option, + #[arg(num_args = 1.., help = "The source file(s) to use")] + pub paths: Option>, + #[arg(long, help = "The path to an alternative config.json file")] + pub config_path: Option, +} + +#[derive(Args)] +#[command(about = "Generate a list of tags")] +struct Tags { + #[arg( + long, + help = "Select a language by the scope instead of a file extension" + )] + pub scope: Option, + #[arg(long, short, help = "Measure execution time")] + pub time: bool, + #[arg(long, short, help = "Suppress main output")] + pub quiet: bool, + #[arg( + long = "paths", + help = "The path to a file with paths to source file(s)" + )] + pub paths_file: Option, + #[arg(num_args = 1.., help = "The source file(s) to use")] + pub paths: Option>, + #[arg(long, help = "The path to an alternative config.json file")] + pub config_path: Option, +} + +#[derive(Args)] +#[command( + about = "Start local playground for a parser in the browser", + alias = "play", + alias = "pg", + alias = "web-ui" +)] +struct Playground { + #[arg(long, short, help = "Don't open in default browser")] + pub quiet: bool, + #[arg( + long, + help = "Path to the directory containing the grammar and wasm files" + )] + pub grammar_path: Option, +} + +#[derive(Args)] +#[command(about = "Print info about all known language parsers", alias = "langs")] +struct DumpLanguages { + #[arg(long, help = "The path to an alternative config.json file")] + pub config_path: Option, +} + fn main() { let result = run(); if let Err(err) = &result { @@ -26,255 +378,45 @@ fn main() { } } if !err.to_string().is_empty() { - eprintln!("{:?}", err); + eprintln!("{err:?}"); } std::process::exit(1); } } fn run() -> Result<()> { - let version = if let Some(build_sha) = BUILD_SHA { - format!("{} ({})", BUILD_VERSION, build_sha) - } else { - BUILD_VERSION.to_string() - }; - - let debug_arg = Arg::with_name("debug") - .help("Show parsing debug log") - .long("debug") - .short("d"); - - let debug_graph_arg = Arg::with_name("debug-graph") - .help("Produce the log.html file with debug graphs") - .long("debug-graph") - .short("D"); - - let debug_build_arg = Arg::with_name("debug-build") - .help("Compile a parser in debug mode") - .long("debug-build") - .short("0"); - - let paths_file_arg = Arg::with_name("paths-file") - .help("The path to a file with paths to source file(s)") - .long("paths") - .takes_value(true); - - let paths_arg = Arg::with_name("paths") - .help("The source file(s) to use") - .multiple(true); - - let scope_arg = Arg::with_name("scope") - .help("Select a language by the scope instead of a file extension") - .long("scope") - .takes_value(true); - - let time_arg = Arg::with_name("time") - .help("Measure execution time") - .long("time") - .short("t"); - - let quiet_arg = Arg::with_name("quiet") - .help("Suppress main output") - .long("quiet") - .short("q"); - - let matches = App::new("tree-sitter") - .author("Max Brunsfeld ") - .about("Generates and tests parsers") - .version(version.as_str()) - .setting(AppSettings::SubcommandRequiredElseHelp) - .global_setting(AppSettings::ColoredHelp) - .global_setting(AppSettings::DeriveDisplayOrder) - .global_setting(AppSettings::DisableHelpSubcommand) - .subcommand(SubCommand::with_name("init-config").about("Generate a default config file")) - .subcommand( - SubCommand::with_name("generate") - .alias("gen") - .alias("g") - .about("Generate a parser") - .arg(Arg::with_name("grammar-path").index(1)) - .arg(Arg::with_name("log").long("log")) - .arg( - Arg::with_name("abi-version") - .long("abi") - .value_name("version") - .help(&format!( - concat!( - "Select the language ABI version to generate (default {}).\n", - "Use --abi=latest to generate the newest supported version ({}).", - ), - DEFAULT_GENERATE_ABI_VERSION, - tree_sitter::LANGUAGE_VERSION, - )), - ) - .arg(Arg::with_name("no-bindings").long("no-bindings")) - .arg( - Arg::with_name("build") - .long("build") - .short("b") - .help("Compile all defined languages in the current dir"), - ) - .arg(&debug_build_arg) - .arg( - Arg::with_name("libdir") - .long("libdir") - .takes_value(true) - .value_name("path"), - ) - .arg( - Arg::with_name("report-states-for-rule") - .long("report-states-for-rule") - .value_name("rule-name") - .takes_value(true), - ), - ) - .subcommand( - SubCommand::with_name("parse") - .alias("p") - .about("Parse files") - .arg(&paths_file_arg) - .arg(&paths_arg) - .arg(&scope_arg) - .arg(&debug_arg) - .arg(&debug_build_arg) - .arg(&debug_graph_arg) - .arg(Arg::with_name("output-dot").long("dot")) - .arg(Arg::with_name("output-xml").long("xml").short("x")) - .arg( - Arg::with_name("stat") - .help("Show parsing statistic") - .long("stat") - .short("s"), - ) - .arg( - Arg::with_name("timeout") - .help("Interrupt the parsing process by timeout (µs)") - .long("timeout") - .takes_value(true), - ) - .arg(&time_arg) - .arg(&quiet_arg) - .arg( - Arg::with_name("edits") - .help("Apply edits in the format: \"row,col del_count insert_text\"") - .long("edit") - .short("edit") - .takes_value(true) - .multiple(true) - .number_of_values(1), - ), - ) - .subcommand( - SubCommand::with_name("query") - .alias("q") - .about("Search files using a syntax tree query") - .arg( - Arg::with_name("query-path") - .help("Path to a file with queries") - .index(1) - .required(true), - ) - .arg(&time_arg) - .arg(&quiet_arg) - .arg(&paths_file_arg) - .arg(&paths_arg.clone().index(2)) - .arg( - Arg::with_name("byte-range") - .help("The range of byte offsets in which the query will be executed") - .long("byte-range") - .takes_value(true), - ) - .arg( - Arg::with_name("row-range") - .help("The range of rows in which the query will be executed") - .long("row-range") - .takes_value(true), - ) - .arg(&scope_arg) - .arg(Arg::with_name("captures").long("captures").short("c")) - .arg(Arg::with_name("test").long("test")), - ) - .subcommand( - SubCommand::with_name("tags") - .about("Generate a list of tags") - .arg(&scope_arg) - .arg(&time_arg) - .arg(&quiet_arg) - .arg(&paths_file_arg) - .arg(&paths_arg), - ) - .subcommand( - SubCommand::with_name("test") - .alias("t") - .about("Run a parser's tests") - .arg( - Arg::with_name("filter") - .long("filter") - .short("f") - .takes_value(true) - .help("Only run corpus test cases whose name includes the given string"), - ) - .arg( - Arg::with_name("update") - .long("update") - .short("u") - .help("Update all syntax trees in corpus files with current parser output"), - ) - .arg(&debug_arg) - .arg(&debug_build_arg) - .arg(&debug_graph_arg), - ) - .subcommand( - SubCommand::with_name("highlight") - .about("Highlight a file") - .arg( - Arg::with_name("html") - .help("Generate highlighting as an HTML document") - .long("html") - .short("H"), - ) - .arg(&scope_arg) - .arg(&time_arg) - .arg(&quiet_arg) - .arg(&paths_file_arg) - .arg(&paths_arg), - ) - .subcommand( - SubCommand::with_name("build-wasm") - .alias("bw") - .about("Compile a parser to WASM") - .arg( - Arg::with_name("docker") - .long("docker") - .help("Run emscripten via docker even if it is installed locally"), - ) - .arg(Arg::with_name("path").index(1).multiple(true)), - ) - .subcommand( - SubCommand::with_name("playground") - .alias("play") - .alias("pg") - .alias("web-ui") - .about("Start local playground for a parser in the browser") - .arg( - Arg::with_name("quiet") - .long("quiet") - .short("q") - .help("Don't open in default browser"), - ), - ) - .subcommand( - SubCommand::with_name("dump-languages") - .about("Print info about all known language parsers"), + let version = BUILD_SHA.map_or_else( + || BUILD_VERSION.to_string(), + |build_sha| format!("{BUILD_VERSION} ({build_sha})"), + ); + let version: &'static str = Box::leak(version.into_boxed_str()); + + let cli = Command::new("tree-sitter") + .help_template( + "\ +{before-help}{name} {version} +{author-with-newline}{about-with-newline} +{usage-heading} {usage} + +{all-args}{after-help} +", ) - .get_matches(); + .version(version) + .subcommand_required(true) + .arg_required_else_help(true) + .disable_help_subcommand(true) + .disable_colored_help(false); + let cli = Commands::augment_subcommands(cli); + + let command = Commands::from_arg_matches(&cli.get_matches())?; let current_dir = env::current_dir().unwrap(); - let config = Config::load()?; let mut loader = loader::Loader::new()?; - match matches.subcommand() { - ("init-config", Some(_)) => { + let color = env::var("NO_COLOR").map_or(true, |v| v != "1"); + + match command { + Commands::InitConfig(_) => { if let Ok(Some(config_path)) = Config::find_config_file() { return Err(anyhow!( "Remove your existing config file first: {}", @@ -291,171 +433,227 @@ fn run() -> Result<()> { ); } - ("generate", Some(matches)) => { - let grammar_path = matches.value_of("grammar-path"); - let debug_build = matches.is_present("debug-build"); - let build = matches.is_present("build"); - let libdir = matches.value_of("libdir"); - let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| { - if matches.is_present("report-states") { - Some("") - } else { - None - } - }); - if matches.is_present("log") { + Commands::Generate(generate_options) => { + if generate_options.log { logger::init(); } - let abi_version = - matches - .value_of("abi-version") - .map_or(DEFAULT_GENERATE_ABI_VERSION, |version| { - if version == "latest" { - tree_sitter::LANGUAGE_VERSION - } else { - version.parse().expect("invalid abi version flag") - } - }); - let generate_bindings = !matches.is_present("no-bindings"); + let abi_version = generate_options.abi_version.as_ref().map_or( + DEFAULT_GENERATE_ABI_VERSION, + |version| { + if version == "latest" { + tree_sitter::LANGUAGE_VERSION + } else { + version.parse().expect("invalid abi version flag") + } + }, + ); generate::generate_parser_in_directory( ¤t_dir, - grammar_path, + generate_options.grammar_path.as_deref(), abi_version, - generate_bindings, - report_symbol_name, + !generate_options.no_bindings, + generate_options.report_states_for_rule.as_deref(), + generate_options.js_runtime.as_deref(), )?; - if build { - if let Some(path) = libdir { + if generate_options.build { + if let Some(path) = generate_options.libdir { loader = loader::Loader::with_parser_lib_path(PathBuf::from(path)); } - loader.use_debug_build(debug_build); + loader.use_debug_build(generate_options.debug_build); loader.languages_at_path(¤t_dir)?; } } - ("test", Some(matches)) => { - let debug = matches.is_present("debug"); - let debug_graph = matches.is_present("debug-graph"); - let debug_build = matches.is_present("debug-build"); - let update = matches.is_present("update"); - let filter = matches.value_of("filter"); - - if debug { - // For augmenting debug logging in external scanners - env::set_var("TREE_SITTER_DEBUG", "1"); - } - - loader.use_debug_build(debug_build); - - let languages = loader.languages_at_path(¤t_dir)?; - let language = languages - .first() - .ok_or_else(|| anyhow!("No language found"))?; - let test_dir = current_dir.join("test"); - - // Run the corpus tests. Look for them at two paths: `test/corpus` and `corpus`. - let mut test_corpus_dir = test_dir.join("corpus"); - if !test_corpus_dir.is_dir() { - test_corpus_dir = current_dir.join("corpus"); - } - if test_corpus_dir.is_dir() { - test::run_tests_at_path( - *language, - &test_corpus_dir, - debug, - debug_graph, - filter, - update, + Commands::Build(build_options) => { + if build_options.wasm { + let grammar_path = + current_dir.join(build_options.path.as_deref().unwrap_or_default()); + let output_path = build_options.output.map(|path| current_dir.join(path)); + let root_path = lookup_package_json_for_path(&grammar_path.join("package.json")) + .map(|(p, _)| p.parent().unwrap().to_path_buf())?; + wasm::compile_language_to_wasm( + &loader, + Some(&root_path), + &grammar_path, + ¤t_dir, + output_path, + build_options.docker, )?; - } - - // Check that all of the queries are valid. - test::check_queries_at_path(*language, ¤t_dir.join("queries"))?; + } else { + let grammar_path = + current_dir.join(build_options.path.as_deref().unwrap_or_default()); + let output_path = if let Some(ref path) = build_options.output { + let path = Path::new(path); + if path.is_absolute() { + path.to_path_buf() + } else { + current_dir.join(path) + } + } else { + let file_name = grammar_path + .file_stem() + .unwrap() + .to_str() + .unwrap() + .strip_prefix("tree-sitter-") + .unwrap_or("parser"); + current_dir + .join(file_name) + .with_extension(env::consts::DLL_EXTENSION) + }; - // Run the syntax highlighting tests. - let test_highlight_dir = test_dir.join("highlight"); - if test_highlight_dir.is_dir() { - test_highlight::test_highlights(&loader, &test_highlight_dir)?; - } + let flags: &[&str] = + match (build_options.reuse_allocator, build_options.internal_build) { + (true, true) => { + &["TREE_SITTER_REUSE_ALLOCATOR", "TREE_SITTER_INTERNAL_BUILD"] + } + (true, false) => &["TREE_SITTER_REUSE_ALLOCATOR"], + (false, true) => &["TREE_SITTER_INTERNAL_BUILD"], + (false, false) => &[], + }; - let test_tag_dir = test_dir.join("tags"); - if test_tag_dir.is_dir() { - test_tags::test_tags(&loader, &test_tag_dir)?; + let config = Config::load(None)?; + let loader_config = config.get()?; + loader.find_all_languages(&loader_config).unwrap(); + loader + .compile_parser_at_path(&grammar_path, output_path, flags) + .unwrap(); } } - ("parse", Some(matches)) => { - let debug = matches.is_present("debug"); - let debug_graph = matches.is_present("debug-graph"); - let debug_build = matches.is_present("debug-build"); + Commands::BuildWasm(wasm_options) => { + eprintln!("`build-wasm` is deprecated and will be removed in v0.24.0. You should use `build --wasm` instead"); + let grammar_path = current_dir.join(wasm_options.path.unwrap_or_default()); + let root_path = lookup_package_json_for_path(&grammar_path.join("package.json")) + .map(|(p, _)| p.parent().unwrap().to_path_buf())?; + wasm::compile_language_to_wasm( + &loader, + Some(&root_path), + &grammar_path, + ¤t_dir, + None, + wasm_options.docker, + )?; + } - let output = if matches.is_present("output-dot") { + Commands::Parse(parse_options) => { + let config = Config::load(parse_options.config_path)?; + let output = if parse_options.output_dot { ParseOutput::Dot - } else if matches.is_present("output-xml") { + } else if parse_options.output_xml { ParseOutput::Xml - } else if matches.is_present("quiet") { + } else if parse_options.quiet { ParseOutput::Quiet } else { ParseOutput::Normal }; - let time = matches.is_present("time"); - let edits = matches - .values_of("edits") - .map_or(Vec::new(), |e| e.collect()); - let cancellation_flag = util::cancel_on_stdin(); + let encoding = if let Some(encoding) = parse_options.encoding { + match encoding.as_str() { + "utf16" => Some(ffi::TSInputEncodingUTF16), + "utf8" => Some(ffi::TSInputEncodingUTF8), + _ => return Err(anyhow!("Invalid encoding. Expected one of: utf8, utf16")), + } + } else { + None + }; - if debug { + let time = parse_options.time; + let edits = parse_options.edits.unwrap_or_default(); + let cancellation_flag = util::cancel_on_signal(); + let mut parser = Parser::new(); + + if parse_options.debug { // For augmenting debug logging in external scanners env::set_var("TREE_SITTER_DEBUG", "1"); } - loader.use_debug_build(debug_build); + loader.use_debug_build(parse_options.debug_build); + + #[cfg(feature = "wasm")] + if parse_options.wasm { + let engine = tree_sitter::wasmtime::Engine::default(); + parser + .set_wasm_store(tree_sitter::WasmStore::new(engine.clone()).unwrap()) + .unwrap(); + loader.use_wasm(engine); + } - let timeout = matches - .value_of("timeout") - .map_or(0, |t| u64::from_str_radix(t, 10).unwrap()); + let timeout = parse_options.timeout.unwrap_or_default(); - let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + let (paths, language) = if let Some(target_test) = parse_options.test_number { + let (test_path, language_names) = test::get_tmp_test_file(target_test, color)?; + let languages = loader.languages_at_path(¤t_dir)?; + let language = languages + .iter() + .find(|(_, n)| language_names.contains(&Box::from(n.as_str()))) + .map(|(l, _)| l.clone()); + let paths = + collect_paths(None, Some(vec![test_path.to_str().unwrap().to_owned()]))?; + (paths, language) + } else { + ( + collect_paths(parse_options.paths_file.as_deref(), parse_options.paths)?, + None, + ) + }; let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap_or(0); let mut has_error = false; let loader_config = config.get()?; loader.find_all_languages(&loader_config)?; - let should_track_stats = matches.is_present("stat"); + let should_track_stats = parse_options.stat; let mut stats = parse::Stats::default(); - for path in paths { + for path in &paths { let path = Path::new(&path); - let language = - loader.select_language(path, ¤t_dir, matches.value_of("scope"))?; - let this_file_errored = parse::parse_file_at_path( - language, + let language = if let Some(ref language) = language { + language.clone() + } else { + loader.select_language(path, ¤t_dir, parse_options.scope.as_deref())? + }; + parser + .set_language(&language) + .context("incompatible language")?; + + let opts = ParseFileOptions { + language: language.clone(), path, - &edits, + edits: &edits + .iter() + .map(std::string::String::as_str) + .collect::>(), max_path_length, output, - time, + print_time: time, timeout, - debug, - debug_graph, - Some(&cancellation_flag), - )?; + debug: parse_options.debug, + debug_graph: parse_options.debug_graph, + cancellation_flag: Some(&cancellation_flag), + encoding, + open_log: parse_options.open_log, + }; + + let parse_result = parse::parse_file_at_path(&mut parser, &opts)?; if should_track_stats { stats.total_parses += 1; - if !this_file_errored { + if parse_result.successful { stats.successful_parses += 1; } + if let Some(duration) = parse_result.duration { + stats.total_bytes += parse_result.bytes; + stats.total_duration += duration; + } } - has_error |= this_file_errored; + has_error |= !parse_result.successful; } if should_track_stats { - println!("{}", stats) + println!("\n{stats}"); } if has_error { @@ -463,97 +661,204 @@ fn run() -> Result<()> { } } - ("query", Some(matches)) => { - let ordered_captures = matches.values_of("captures").is_some(); - let quiet = matches.values_of("quiet").is_some(); - let time = matches.values_of("time").is_some(); - let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + Commands::Test(test_options) => { + let config = Config::load(test_options.config_path)?; + if test_options.debug { + // For augmenting debug logging in external scanners + env::set_var("TREE_SITTER_DEBUG", "1"); + } + + loader.use_debug_build(test_options.debug_build); + + let mut parser = Parser::new(); + + #[cfg(feature = "wasm")] + if test_options.wasm { + let engine = tree_sitter::wasmtime::Engine::default(); + parser + .set_wasm_store(tree_sitter::WasmStore::new(engine.clone()).unwrap()) + .unwrap(); + loader.use_wasm(engine); + } + + let languages = loader.languages_at_path(¤t_dir)?; + let language = &languages + .first() + .ok_or_else(|| anyhow!("No language found"))? + .0; + parser.set_language(language)?; + + let test_dir = current_dir.join("test"); + + // Run the corpus tests. Look for them in `test/corpus`. + let test_corpus_dir = test_dir.join("corpus"); + if test_corpus_dir.is_dir() { + let mut opts = TestOptions { + path: test_corpus_dir, + debug: test_options.debug, + debug_graph: test_options.debug_graph, + filter: test_options.filter.as_deref(), + include: test_options.include, + exclude: test_options.exclude, + update: test_options.update, + open_log: test_options.open_log, + languages: languages.iter().map(|(l, n)| (n.as_str(), l)).collect(), + color, + test_num: 1, + }; + + test::run_tests_at_path(&mut parser, &mut opts)?; + } + + // Check that all of the queries are valid. + test::check_queries_at_path(language, ¤t_dir.join("queries"))?; + + // Run the syntax highlighting tests. + let test_highlight_dir = test_dir.join("highlight"); + if test_highlight_dir.is_dir() { + let mut highlighter = Highlighter::new(); + highlighter.parser = parser; + test_highlight::test_highlights( + &loader, + &config.get()?, + &mut highlighter, + &test_highlight_dir, + color, + )?; + parser = highlighter.parser; + } + + let test_tag_dir = test_dir.join("tags"); + if test_tag_dir.is_dir() { + let mut tags_context = TagsContext::new(); + tags_context.parser = parser; + test_tags::test_tags( + &loader, + &config.get()?, + &mut tags_context, + &test_tag_dir, + color, + )?; + } + } + + Commands::Query(query_options) => { + let config = Config::load(query_options.config_path)?; + let paths = collect_paths(query_options.paths_file.as_deref(), query_options.paths)?; let loader_config = config.get()?; loader.find_all_languages(&loader_config)?; let language = loader.select_language( Path::new(&paths[0]), ¤t_dir, - matches.value_of("scope"), + query_options.scope.as_deref(), )?; - let query_path = Path::new(matches.value_of("query-path").unwrap()); - let byte_range = matches.value_of("byte-range").and_then(|arg| { - let mut parts = arg.split(":"); + let query_path = Path::new(&query_options.query_path); + + let byte_range = query_options.byte_range.as_ref().and_then(|range| { + let mut parts = range.split(':'); let start = parts.next()?.parse().ok()?; let end = parts.next().unwrap().parse().ok()?; Some(start..end) }); - let point_range = matches.value_of("row-range").and_then(|arg| { - let mut parts = arg.split(":"); + let point_range = query_options.row_range.as_ref().and_then(|range| { + let mut parts = range.split(':'); let start = parts.next()?.parse().ok()?; let end = parts.next().unwrap().parse().ok()?; Some(Point::new(start, 0)..Point::new(end, 0)) }); - let should_test = matches.is_present("test"); + query::query_files_at_paths( - language, + &language, paths, query_path, - ordered_captures, + query_options.captures, byte_range, point_range, - should_test, - quiet, - time, + query_options.test, + query_options.quiet, + query_options.time, )?; } - ("tags", Some(matches)) => { - let loader_config = config.get()?; - loader.find_all_languages(&loader_config)?; - let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; - tags::generate_tags( - &loader, - matches.value_of("scope"), - &paths, - matches.is_present("quiet"), - matches.is_present("time"), - )?; - } - - ("highlight", Some(matches)) => { + Commands::Highlight(highlight_options) => { + let config = Config::load(highlight_options.config_path)?; let theme_config: tree_sitter_cli::highlight::ThemeConfig = config.get()?; loader.configure_highlights(&theme_config.theme.highlight_names); let loader_config = config.get()?; loader.find_all_languages(&loader_config)?; - let time = matches.is_present("time"); - let quiet = matches.is_present("quiet"); - let html_mode = quiet || matches.is_present("html"); - let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + let quiet = highlight_options.quiet; + let html_mode = quiet || highlight_options.html; + let paths = collect_paths( + highlight_options.paths_file.as_deref(), + highlight_options.paths, + )?; if html_mode && !quiet { println!("{}", highlight::HTML_HEADER); } - let cancellation_flag = util::cancel_on_stdin(); + let cancellation_flag = util::cancel_on_signal(); - let mut lang = None; - if let Some(scope) = matches.value_of("scope") { - lang = loader.language_configuration_for_scope(scope)?; - if lang.is_none() { - return Err(anyhow!("Unknown scope '{}'", scope)); + let mut language = None; + if let Some(scope) = highlight_options.scope.as_deref() { + language = loader.language_configuration_for_scope(scope)?; + if language.is_none() { + return Err(anyhow!("Unknown scope '{scope}'")); } } for path in paths { let path = Path::new(&path); - let (language, language_config) = match lang { + let (language, language_config) = match language.clone() { Some(v) => v, - None => match loader.language_configuration_for_file_name(path)? { - Some(v) => v, - None => { - eprintln!("No language found for path {:?}", path); + None => { + if let Some(v) = loader.language_configuration_for_file_name(path)? { + v + } else { + eprintln!("{}", util::lang_not_found_for_path(path, &loader_config)); continue; } - }, + } }; - if let Some(highlight_config) = language_config.highlight_config(language)? { + if let Some(highlight_config) = language_config + .highlight_config(language, highlight_options.query_paths.as_deref())? + { + if highlight_options.check { + let names = if let Some(path) = highlight_options.captures_path.as_deref() { + let path = Path::new(path); + let file = fs::read_to_string(path)?; + let capture_names = file + .lines() + .filter_map(|line| { + if line.trim().is_empty() || line.trim().starts_with(';') { + return None; + } + line.split(';').next().map(|s| s.trim().trim_matches('"')) + }) + .collect::>(); + highlight_config.nonconformant_capture_names(&capture_names) + } else { + highlight_config.nonconformant_capture_names(&HashSet::new()) + }; + if names.is_empty() { + eprintln!("All highlight captures conform to standards."); + } else { + eprintln!( + "Non-standard highlight {} detected:", + if names.len() > 1 { + "captures" + } else { + "capture" + } + ); + for name in names { + eprintln!("* {name}"); + } + } + } + let source = fs::read(path)?; if html_mode { highlight::html( @@ -562,7 +867,8 @@ fn run() -> Result<()> { &source, highlight_config, quiet, - time, + highlight_options.time, + Some(&cancellation_flag), )?; } else { highlight::ansi( @@ -570,12 +876,12 @@ fn run() -> Result<()> { &theme_config.theme, &source, highlight_config, - time, + highlight_options.time, Some(&cancellation_flag), )?; } } else { - eprintln!("No syntax highlighting config found for path {:?}", path); + eprintln!("No syntax highlighting config found for path {path:?}"); } } @@ -584,17 +890,31 @@ fn run() -> Result<()> { } } - ("build-wasm", Some(matches)) => { - let grammar_path = current_dir.join(matches.value_of("path").unwrap_or("")); - wasm::compile_language_to_wasm(&grammar_path, matches.is_present("docker"))?; + Commands::Tags(tags_options) => { + let config = Config::load(tags_options.config_path)?; + let loader_config = config.get()?; + loader.find_all_languages(&loader_config)?; + let paths = collect_paths(tags_options.paths_file.as_deref(), tags_options.paths)?; + tags::generate_tags( + &loader, + &config.get()?, + tags_options.scope.as_deref(), + &paths, + tags_options.quiet, + tags_options.time, + )?; } - ("playground", Some(matches)) => { - let open_in_browser = !matches.is_present("quiet"); - playground::serve(¤t_dir, open_in_browser); + Commands::Playground(playground_options) => { + let open_in_browser = !playground_options.quiet; + let grammar_path = playground_options + .grammar_path + .map_or(current_dir, PathBuf::from); + playground::serve(&grammar_path, open_in_browser)?; } - ("dump-languages", Some(_)) => { + Commands::DumpLanguages(dump_options) => { + let config = Config::load(dump_options.config_path)?; let loader_config = config.get()?; loader.find_all_languages(&loader_config)?; for (configuration, language_path) in loader.get_all_language_configurations() { @@ -616,20 +936,47 @@ fn run() -> Result<()> { ); } } - - _ => unreachable!(), } Ok(()) } -fn collect_paths<'a>( - paths_file: Option<&str>, - paths: Option>, -) -> Result> { +#[must_use] +const fn get_styles() -> clap::builder::Styles { + clap::builder::Styles::styled() + .usage( + Style::new() + .bold() + .fg_color(Some(Color::Ansi(AnsiColor::Yellow))), + ) + .header( + Style::new() + .bold() + .fg_color(Some(Color::Ansi(AnsiColor::Yellow))), + ) + .literal(Style::new().fg_color(Some(Color::Ansi(AnsiColor::Green)))) + .invalid( + Style::new() + .bold() + .fg_color(Some(Color::Ansi(AnsiColor::Red))), + ) + .error( + Style::new() + .bold() + .fg_color(Some(Color::Ansi(AnsiColor::Red))), + ) + .valid( + Style::new() + .bold() + .fg_color(Some(Color::Ansi(AnsiColor::Green))), + ) + .placeholder(Style::new().fg_color(Some(Color::Ansi(AnsiColor::White)))) +} + +fn collect_paths(paths_file: Option<&str>, paths: Option>) -> Result> { if let Some(paths_file) = paths_file { return Ok(fs::read_to_string(paths_file) - .with_context(|| format!("Failed to read paths file {}", paths_file))? + .with_context(|| format!("Failed to read paths file {paths_file}"))? .trim() .lines() .map(String::from) @@ -642,25 +989,23 @@ fn collect_paths<'a>( let mut incorporate_path = |path: &str, positive| { if positive { result.push(path.to_string()); - } else { - if let Some(index) = result.iter().position(|p| p == path) { - result.remove(index); - } + } else if let Some(index) = result.iter().position(|p| p == path) { + result.remove(index); } }; for mut path in paths { let mut positive = true; - if path.starts_with("!") { + if path.starts_with('!') { positive = false; - path = path.trim_start_matches("!"); + path = path.trim_start_matches('!').to_string(); } - if Path::new(path).exists() { - incorporate_path(path, positive); + if Path::new(&path).exists() { + incorporate_path(&path, positive); } else { let paths = - glob(path).with_context(|| format!("Invalid glob pattern {:?}", path))?; + glob(&path).with_context(|| format!("Invalid glob pattern {path:?}"))?; for path in paths { if let Some(path) = path?.to_str() { incorporate_path(path, positive); diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 3e28e51..0624382 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -1,11 +1,15 @@ -use super::util; +use std::{ + fmt, fs, + io::{self, Write}, + path::Path, + sync::atomic::AtomicUsize, + time::{Duration, Instant}, +}; + use anyhow::{anyhow, Context, Result}; -use std::io::{self, Write}; -use std::path::Path; -use std::sync::atomic::AtomicUsize; -use std::time::Instant; -use std::{fmt, fs, usize}; -use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Tree}; +use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Tree}; + +use super::util; #[derive(Debug)] pub struct Edit { @@ -18,19 +22,30 @@ pub struct Edit { pub struct Stats { pub successful_parses: usize, pub total_parses: usize, + pub total_bytes: usize, + pub total_duration: Duration, } impl fmt::Display for Stats { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - return writeln!(f, "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {:.2}%", - self.total_parses, - self.successful_parses, - self.total_parses - self.successful_parses, - (self.successful_parses as f64) / (self.total_parses as f64) * 100.0); + let duration_us = self.total_duration.as_micros(); + writeln!( + f, + "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {:.2}%; average speed: {} bytes/ms", + self.total_parses, + self.successful_parses, + self.total_parses - self.successful_parses, + ((self.successful_parses as f64) / (self.total_parses as f64)) * 100.0, + if duration_us != 0 { + ((self.total_bytes as u128) * 1_000) / duration_us + } else { + 0 + } + ) } } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq, Eq)] pub enum ParseOutput { Normal, Quiet, @@ -38,71 +53,105 @@ pub enum ParseOutput { Dot, } -pub fn parse_file_at_path( - language: Language, - path: &Path, - edits: &Vec<&str>, - max_path_length: usize, - output: ParseOutput, - print_time: bool, - timeout: u64, - debug: bool, - debug_graph: bool, - cancellation_flag: Option<&AtomicUsize>, -) -> Result { +pub struct ParseFileOptions<'a> { + pub language: Language, + pub path: &'a Path, + pub edits: &'a [&'a str], + pub max_path_length: usize, + pub output: ParseOutput, + pub print_time: bool, + pub timeout: u64, + pub debug: bool, + pub debug_graph: bool, + pub cancellation_flag: Option<&'a AtomicUsize>, + pub encoding: Option, + pub open_log: bool, +} + +#[derive(Copy, Clone)] +pub struct ParseResult { + pub successful: bool, + pub bytes: usize, + pub duration: Option, +} + +pub fn parse_file_at_path(parser: &mut Parser, opts: &ParseFileOptions) -> Result { let mut _log_session = None; - let mut parser = Parser::new(); - parser.set_language(language)?; - let mut source_code = - fs::read(path).with_context(|| format!("Error reading source file {:?}", path))?; + parser.set_language(&opts.language)?; + let mut source_code = fs::read(opts.path) + .with_context(|| format!("Error reading source file {:?}", opts.path))?; // If the `--cancel` flag was passed, then cancel the parse // when the user types a newline. - unsafe { parser.set_cancellation_flag(cancellation_flag) }; + unsafe { parser.set_cancellation_flag(opts.cancellation_flag) }; // Set a timeout based on the `--time` flag. - parser.set_timeout_micros(timeout); + parser.set_timeout_micros(opts.timeout); // Render an HTML graph if `--debug-graph` was passed - if debug_graph { - _log_session = Some(util::log_graphs(&mut parser, "log.html")?); + if opts.debug_graph { + _log_session = Some(util::log_graphs(parser, "log.html", opts.open_log)?); } // Log to stderr if `--debug` was passed - else if debug { + else if opts.debug { parser.set_logger(Some(Box::new(|log_type, message| { if log_type == LogType::Lex { - io::stderr().write(b" ").unwrap(); + io::stderr().write_all(b" ").unwrap(); } - write!(&mut io::stderr(), "{}\n", message).unwrap(); + writeln!(&mut io::stderr(), "{message}").unwrap(); }))); } let time = Instant::now(); - let tree = parser.parse(&source_code, None); + + #[inline(always)] + fn is_utf16_bom(bom_bytes: &[u8]) -> bool { + bom_bytes == [0xFF, 0xFE] || bom_bytes == [0xFE, 0xFF] + } + + let tree = match opts.encoding { + Some(encoding) if encoding == ffi::TSInputEncodingUTF16 => { + let source_code_utf16 = source_code + .chunks_exact(2) + .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]])) + .collect::>(); + parser.parse_utf16(&source_code_utf16, None) + } + None if source_code.len() >= 2 && is_utf16_bom(&source_code[0..2]) => { + let source_code_utf16 = source_code + .chunks_exact(2) + .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]])) + .collect::>(); + parser.parse_utf16(&source_code_utf16, None) + } + _ => parser.parse(&source_code, None), + }; + + parser.stop_printing_dot_graphs(); let stdout = io::stdout(); let mut stdout = stdout.lock(); if let Some(mut tree) = tree { - if debug_graph && !edits.is_empty() { + if opts.debug_graph && !opts.edits.is_empty() { println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code)); } - for (i, edit) in edits.iter().enumerate() { + for (i, edit) in opts.edits.iter().enumerate() { let edit = parse_edit_flag(&source_code, edit)?; - perform_edit(&mut tree, &mut source_code, &edit); + perform_edit(&mut tree, &mut source_code, &edit)?; tree = parser.parse(&source_code, Some(&tree)).unwrap(); - if debug_graph { - println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code)); + if opts.debug_graph { + println!("AFTER {i}:\n{}", String::from_utf8_lossy(&source_code)); } } let duration = time.elapsed(); - let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000; + let duration_ms = duration.as_micros() as f64 / 1e3; let mut cursor = tree.walk(); - if matches!(output, ParseOutput::Normal) { + if opts.output == ParseOutput::Normal { let mut needs_newline = false; let mut indent_level = 0; let mut did_visit_children = false; @@ -111,7 +160,7 @@ pub fn parse_file_at_path( let is_named = node.is_named(); if did_visit_children { if is_named { - stdout.write(b")")?; + stdout.write_all(b")")?; needs_newline = true; } if cursor.goto_next_sibling() { @@ -125,15 +174,15 @@ pub fn parse_file_at_path( } else { if is_named { if needs_newline { - stdout.write(b"\n")?; + stdout.write_all(b"\n")?; } for _ in 0..indent_level { - stdout.write(b" ")?; + stdout.write_all(b" ")?; } let start = node.start_position(); let end = node.end_position(); if let Some(field_name) = cursor.field_name() { - write!(&mut stdout, "{}: ", field_name)?; + write!(&mut stdout, "{field_name}: ")?; } write!( &mut stdout, @@ -155,49 +204,77 @@ pub fn parse_file_at_path( } } cursor.reset(tree.root_node()); - println!(""); + println!(); } - if matches!(output, ParseOutput::Xml) { + if opts.output == ParseOutput::Xml { let mut needs_newline = false; let mut indent_level = 0; let mut did_visit_children = false; - let mut tags: Vec<&str> = Vec::new(); + let mut had_named_children = false; + let mut tags = Vec::<&str>::new(); + writeln!(&mut stdout, "")?; loop { let node = cursor.node(); let is_named = node.is_named(); if did_visit_children { if is_named { let tag = tags.pop(); - write!(&mut stdout, "\n", tag.expect("there is a tag"))?; + if had_named_children { + for _ in 0..indent_level { + stdout.write_all(b" ")?; + } + } + write!(&mut stdout, "", tag.expect("there is a tag"))?; + // we only write a line in the case where it's the last sibling + if let Some(parent) = node.parent() { + if parent.child(parent.child_count() - 1).unwrap() == node { + stdout.write_all(b"\n")?; + } + } needs_newline = true; } if cursor.goto_next_sibling() { did_visit_children = false; + had_named_children = false; } else if cursor.goto_parent() { did_visit_children = true; + had_named_children = is_named; indent_level -= 1; + if !is_named && needs_newline { + stdout.write_all(b"\n")?; + for _ in 0..indent_level { + stdout.write_all(b" ")?; + } + } } else { break; } } else { if is_named { if needs_newline { - stdout.write(b"\n")?; + stdout.write_all(b"\n")?; } for _ in 0..indent_level { - stdout.write(b" ")?; + stdout.write_all(b" ")?; } write!(&mut stdout, "<{}", node.kind())?; if let Some(field_name) = cursor.field_name() { - write!(&mut stdout, " type=\"{}\"", field_name)?; + write!(&mut stdout, " field=\"{field_name}\"")?; } + let start = node.start_position(); + let end = node.end_position(); + write!(&mut stdout, " srow=\"{}\"", start.row)?; + write!(&mut stdout, " scol=\"{}\"", start.column)?; + write!(&mut stdout, " erow=\"{}\"", end.row)?; + write!(&mut stdout, " ecol=\"{}\"", end.column)?; write!(&mut stdout, ">")?; tags.push(node.kind()); needs_newline = true; } if cursor.goto_first_child() { did_visit_children = false; + had_named_children = false; indent_level += 1; } else { did_visit_children = true; @@ -205,16 +282,27 @@ pub fn parse_file_at_path( let end = node.end_byte(); let value = std::str::from_utf8(&source_code[start..end]).expect("has a string"); + // if !is_named { + // for _ in 0..indent_level { + // stdout.write_all(b" ")?; + // } + // } + if !is_named && needs_newline { + stdout.write_all(b"\n")?; + for _ in 0..indent_level { + stdout.write_all(b" ")?; + } + } write!(&mut stdout, "{}", html_escape::encode_text(value))?; } } } cursor.reset(tree.root_node()); - println!(""); + println!(); } - if matches!(output, ParseOutput::Dot) { - util::print_tree_graph(&tree, "log.html").unwrap(); + if opts.output == ParseOutput::Dot { + util::print_tree_graph(&tree, "log.html", opts.open_log).unwrap(); } let mut first_error = None; @@ -224,23 +312,22 @@ pub fn parse_file_at_path( if node.is_error() || node.is_missing() { first_error = Some(node); break; - } else { - if !cursor.goto_first_child() { - break; - } + } + if !cursor.goto_first_child() { + break; } } else if !cursor.goto_next_sibling() { break; } } - if first_error.is_some() || print_time { + if first_error.is_some() || opts.print_time { write!( &mut stdout, - "{:width$}\t{} ms", - path.to_str().unwrap(), - duration_ms, - width = max_path_length + "{:width$}\t{duration_ms:>7.2} ms\t{:>6} bytes/ms", + opts.path.to_str().unwrap(), + (source_code.len() as u128 * 1_000_000) / duration.as_nanos(), + width = opts.max_path_length )?; if let Some(node) = first_error { let start = node.start_position(); @@ -253,7 +340,7 @@ pub fn parse_file_at_path( write!( &mut stdout, "MISSING \"{}\"", - node.kind().replace("\n", "\\n") + node.kind().replace('\n', "\\n") )?; } } else { @@ -265,33 +352,42 @@ pub fn parse_file_at_path( start.row, start.column, end.row, end.column )?; } - write!(&mut stdout, "\n")?; + writeln!(&mut stdout)?; } - return Ok(first_error.is_some()); - } else if print_time { + return Ok(ParseResult { + successful: first_error.is_none(), + bytes: source_code.len(), + duration: Some(duration), + }); + } + + if opts.print_time { let duration = time.elapsed(); - let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000; + let duration_ms = duration.as_micros() as f64 / 1e3; writeln!( &mut stdout, - "{:width$}\t{} ms (timed out)", - path.to_str().unwrap(), - duration_ms, - width = max_path_length + "{:width$}\t{duration_ms:>7.2} ms\t(timed out)", + opts.path.to_str().unwrap(), + width = opts.max_path_length )?; } - Ok(false) + Ok(ParseResult { + successful: false, + bytes: source_code.len(), + duration: None, + }) } -pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> InputEdit { +pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> Result { let start_byte = edit.position; let old_end_byte = edit.position + edit.deleted_length; let new_end_byte = edit.position + edit.inserted_text.len(); - let start_position = position_for_offset(input, start_byte); - let old_end_position = position_for_offset(input, old_end_byte); - input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned()); - let new_end_position = position_for_offset(input, new_end_byte); + let start_position = position_for_offset(input, start_byte)?; + let old_end_position = position_for_offset(input, old_end_byte)?; + input.splice(start_byte..old_end_byte, edit.inserted_text.iter().copied()); + let new_end_position = position_for_offset(input, new_end_byte)?; let edit = InputEdit { start_byte, old_end_byte, @@ -301,10 +397,10 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> InputE new_end_position, }; tree.edit(&edit); - edit + Ok(edit) } -fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { +fn parse_edit_flag(source_code: &[u8], flag: &str) -> Result { let error = || { anyhow!(concat!( "Invalid edit string '{}'. ", @@ -316,7 +412,7 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { // * edit position // * deleted length // * inserted text - let mut parts = flag.split(" "); + let mut parts = flag.split(' '); let position = parts.next().ok_or_else(error)?; let deleted_length = parts.next().ok_or_else(error)?; let inserted_text = parts.collect::>().join(" ").into_bytes(); @@ -324,19 +420,19 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { // Position can either be a byte_offset or row,column pair, separated by a comma let position = if position == "$" { source_code.len() - } else if position.contains(",") { - let mut parts = position.split(","); + } else if position.contains(',') { + let mut parts = position.split(','); let row = parts.next().ok_or_else(error)?; - let row = usize::from_str_radix(row, 10).map_err(|_| error())?; + let row = row.parse::().map_err(|_| error())?; let column = parts.next().ok_or_else(error)?; - let column = usize::from_str_radix(column, 10).map_err(|_| error())?; - offset_for_position(source_code, Point { row, column }) + let column = column.parse::().map_err(|_| error())?; + offset_for_position(source_code, Point { row, column })? } else { - usize::from_str_radix(position, 10).map_err(|_| error())? + position.parse::().map_err(|_| error())? }; // Deleted length must be a byte count. - let deleted_length = usize::from_str_radix(deleted_length, 10).map_err(|_| error())?; + let deleted_length = deleted_length.parse::().map_err(|_| error())?; Ok(Edit { position, @@ -345,31 +441,48 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { }) } -fn offset_for_position(input: &Vec, position: Point) -> usize { - let mut current_position = Point { row: 0, column: 0 }; - for (i, c) in input.iter().enumerate() { - if *c as char == '\n' { - current_position.row += 1; - current_position.column = 0; - } else { - current_position.column += 1; - } - if current_position > position { - return i; +pub fn offset_for_position(input: &[u8], position: Point) -> Result { + let mut row = 0; + let mut offset = 0; + let mut iter = memchr::memchr_iter(b'\n', input); + loop { + if let Some(pos) = iter.next() { + if row < position.row { + row += 1; + offset = pos; + continue; + } } + offset += 1; + break; + } + if position.row - row > 0 { + return Err(anyhow!("Failed to address a row: {}", position.row)); } - return input.len(); + if let Some(pos) = iter.next() { + if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) { + return Err(anyhow!("Failed to address a column: {}", position.column)); + }; + } else if input.len() - offset < position.column { + return Err(anyhow!("Failed to address a column over the end")); + } + Ok(offset + position.column) } -fn position_for_offset(input: &Vec, offset: usize) -> Point { +pub fn position_for_offset(input: &[u8], offset: usize) -> Result { + if offset > input.len() { + return Err(anyhow!("Failed to address an offset: {offset}")); + } let mut result = Point { row: 0, column: 0 }; - for c in &input[0..offset] { - if *c as char == '\n' { - result.row += 1; - result.column = 0; - } else { - result.column += 1; - } + let mut last = 0; + for pos in memchr::memchr_iter(b'\n', &input[..offset]) { + result.row += 1; + last = pos; } - result + result.column = if result.row > 0 { + offset - last - 1 + } else { + offset + }; + Ok(result) } diff --git a/cli/src/playground.html b/cli/src/playground.html index 22c874d..420cd28 100644 --- a/cli/src/playground.html +++ b/cli/src/playground.html @@ -3,8 +3,8 @@ tree-sitter THE_LANGUAGE_NAME - - + + @@ -29,6 +29,10 @@ +
+ (?) +
+ diff --git a/cli/src/playground.rs b/cli/src/playground.rs index a2dbef9..12348b4 100644 --- a/cli/src/playground.rs +++ b/cli/src/playground.rs @@ -1,5 +1,3 @@ -use super::wasm; -use anyhow::Context; use std::{ borrow::Cow, env, fs, @@ -7,13 +5,16 @@ use std::{ path::{Path, PathBuf}, str::{self, FromStr as _}, }; + +use anyhow::{anyhow, Context, Result}; use tiny_http::{Header, Response, Server}; -use webbrowser; + +use super::wasm; macro_rules! optional_resource { - ($name: tt, $path: tt) => { + ($name:tt, $path:tt) => { #[cfg(TREE_SITTER_EMBED_WASM_BINDING)] - fn $name(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { + fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> { if let Some(tree_sitter_dir) = tree_sitter_dir { Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap()) } else { @@ -22,7 +23,7 @@ macro_rules! optional_resource { } #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))] - fn $name(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { + fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> { if let Some(tree_sitter_dir) = tree_sitter_dir { Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap()) } else { @@ -36,44 +37,32 @@ optional_resource!(get_playground_js, "docs/assets/js/playground.js"); optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js"); optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm"); -fn get_main_html(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { - if let Some(tree_sitter_dir) = tree_sitter_dir { - Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap()) - } else { - Cow::Borrowed(include_bytes!("playground.html")) - } +fn get_main_html(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> { + tree_sitter_dir.map_or( + Cow::Borrowed(include_bytes!("playground.html")), + |tree_sitter_dir| { + Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap()) + }, + ) } -pub fn serve(grammar_path: &Path, open_in_browser: bool) { - let server = get_server(); - let grammar_name = wasm::get_grammar_name(&grammar_path.join("src")) - .with_context(|| "Failed to get wasm filename") - .unwrap(); - let wasm_filename = format!("tree-sitter-{}.wasm", grammar_name); - let language_wasm = fs::read(grammar_path.join(&wasm_filename)) - .with_context(|| { - format!( - "Failed to read {}. Run `tree-sitter build-wasm` first.", - wasm_filename - ) - }) - .unwrap(); +pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> { + let server = get_server()?; + let (grammar_name, language_wasm) = wasm::load_language_wasm_file(grammar_path)?; let url = format!("http://{}", server.server_addr()); - println!("Started playground on: {}", url); - if open_in_browser { - if let Err(_) = webbrowser::open(&url) { - eprintln!("Failed to open '{}' in a web browser", url); - } + println!("Started playground on: {url}"); + if open_in_browser && webbrowser::open(&url).is_err() { + eprintln!("Failed to open '{url}' in a web browser"); } let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok(); - let main_html = str::from_utf8(&get_main_html(&tree_sitter_dir)) + let main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_deref())) .unwrap() .replace("THE_LANGUAGE_NAME", &grammar_name) .into_bytes(); - let playground_js = get_playground_js(&tree_sitter_dir); - let lib_js = get_lib_js(&tree_sitter_dir); - let lib_wasm = get_lib_wasm(&tree_sitter_dir); + let playground_js = get_playground_js(tree_sitter_dir.as_deref()); + let lib_js = get_lib_js(tree_sitter_dir.as_deref()); + let lib_wasm = get_lib_wasm(tree_sitter_dir.as_deref()); let html_header = Header::from_str("Content-Type: text/html").unwrap(); let js_header = Header::from_str("Content-Type: application/javascript").unwrap(); @@ -106,11 +95,15 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) { } _ => response(b"Not found", &html_header).with_status_code(404), }; - request.respond(res).expect("Failed to write HTTP response"); + request + .respond(res) + .with_context(|| "Failed to write HTTP response")?; } + + Ok(()) } -fn redirect<'a>(url: &'a str) -> Response<&'a [u8]> { +fn redirect(url: &str) -> Response<&[u8]> { Response::empty(302) .with_data("".as_bytes(), Some(0)) .with_header(Header::from_bytes("Location", url.as_bytes()).unwrap()) @@ -122,18 +115,24 @@ fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> { .with_header(header.clone()) } -fn get_server() -> Server { - let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned()); +fn get_server() -> Result { + let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or_else(|_| "127.0.0.1".to_owned()); let port = env::var("TREE_SITTER_PLAYGROUND_PORT") - .map(|v| v.parse::().expect("Invalid port specification")) + .map(|v| { + v.parse::() + .with_context(|| "Invalid port specification") + }) .ok(); let listener = match port { - Some(port) => bind_to(&*addr, port).expect("Can't bind to the specified port"), - None => { - get_listener_on_available_port(&*addr).expect("Can't find a free port to bind to it") + Some(port) => { + bind_to(&addr, port?).with_context(|| "Failed to bind to the specified port")? } + None => get_listener_on_available_port(&addr) + .with_context(|| "Failed to find a free port to bind to it")?, }; - Server::from_listener(listener, None).expect("Failed to start web server") + let server = + Server::from_listener(listener, None).map_err(|_| anyhow!("Failed to start web server"))?; + Ok(server) } fn get_listener_on_available_port(addr: &str) -> Option { diff --git a/cli/src/query.rs b/cli/src/query.rs index fc24cb0..bffa058 100644 --- a/cli/src/query.rs +++ b/cli/src/query.rs @@ -1,5 +1,3 @@ -use crate::query_testing; -use anyhow::{Context, Result}; use std::{ fs, io::{self, Write}, @@ -7,10 +5,15 @@ use std::{ path::Path, time::Instant, }; + +use anyhow::{Context, Result}; use tree_sitter::{Language, Parser, Point, Query, QueryCursor}; +use crate::query_testing; + +#[allow(clippy::too_many_arguments)] pub fn query_files_at_paths( - language: Language, + language: &Language, paths: Vec, query_path: &Path, ordered_captures: bool, @@ -24,7 +27,7 @@ pub fn query_files_at_paths( let mut stdout = stdout.lock(); let query_source = fs::read_to_string(query_path) - .with_context(|| format!("Error reading query file {:?}", query_path))?; + .with_context(|| format!("Error reading query file {query_path:?}"))?; let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?; let mut query_cursor = QueryCursor::new(); @@ -41,10 +44,10 @@ pub fn query_files_at_paths( for path in paths { let mut results = Vec::new(); - writeln!(&mut stdout, "{}", path)?; + writeln!(&mut stdout, "{path}")?; let source_code = - fs::read(&path).with_context(|| format!("Error reading source file {:?}", path))?; + fs::read(&path).with_context(|| format!("Error reading source file {path:?}"))?; let tree = parser.parse(&source_code, None).unwrap(); let start = Instant::now(); @@ -57,17 +60,16 @@ pub fn query_files_at_paths( if !quiet { writeln!( &mut stdout, - " pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`", + " pattern: {:>2}, capture: {} - {capture_name}, start: {}, end: {}, text: `{}`", mat.pattern_index, capture.index, - capture_name, capture.node.start_position(), capture.node.end_position(), capture.node.utf8_text(&source_code).unwrap_or("") )?; } results.push(query_testing::CaptureInfo { - name: capture_name.to_string(), + name: (*capture_name).to_string(), start: capture.node.start_position(), end: capture.node.end_position(), }); @@ -85,23 +87,19 @@ pub fn query_files_at_paths( if end.row == start.row { writeln!( &mut stdout, - " capture: {} - {}, start: {}, end: {}, text: `{}`", + " capture: {} - {capture_name}, start: {start}, end: {end}, text: `{}`", capture.index, - capture_name, - start, - end, capture.node.utf8_text(&source_code).unwrap_or("") )?; } else { writeln!( &mut stdout, - " capture: {}, start: {}, end: {}", - capture_name, start, end, + " capture: {capture_name}, start: {start}, end: {end}", )?; } } results.push(query_testing::CaptureInfo { - name: capture_name.to_string(), + name: (*capture_name).to_string(), start: capture.node.start_position(), end: capture.node.end_position(), }); @@ -115,7 +113,7 @@ pub fn query_files_at_paths( )?; } if should_test { - query_testing::assert_expected_captures(results, path, &mut parser, language)? + query_testing::assert_expected_captures(&results, path, &mut parser, language)?; } if print_time { writeln!(&mut stdout, "{:?}", start.elapsed())?; diff --git a/cli/src/query_testing.rs b/cli/src/query_testing.rs index 9950f12..cdf2e98 100644 --- a/cli/src/query_testing.rs +++ b/cli/src/query_testing.rs @@ -1,7 +1,8 @@ +use std::fs; + use anyhow::{anyhow, Result}; use lazy_static::lazy_static; use regex::Regex; -use std::fs; use tree_sitter::{Language, Parser, Point}; lazy_static! { @@ -18,15 +19,27 @@ pub struct CaptureInfo { #[derive(Debug, PartialEq, Eq)] pub struct Assertion { pub position: Point, + pub negative: bool, pub expected_capture_name: String, } +impl Assertion { + #[must_use] + pub fn new(row: usize, col: usize, negative: bool, expected_capture_name: String) -> Self { + Self { + position: Point::new(row, col), + negative, + expected_capture_name, + } + } +} + /// Parse the given source code, finding all of the comments that contain /// highlighting assertions. Return a vector of (position, expected highlight name) /// pairs. pub fn parse_position_comments( parser: &mut Parser, - language: Language, + language: &Language, source: &[u8], ) -> Result> { let mut result = Vec::new(); @@ -45,7 +58,7 @@ pub fn parse_position_comments( let node = cursor.node(); // Find every comment node. - if node.kind().contains("comment") { + if node.kind().to_lowercase().contains("comment") { if let Ok(text) = node.utf8_text(source) { let mut position = node.start_position(); if position.row > 0 { @@ -54,6 +67,7 @@ pub fn parse_position_comments( // to its own column. let mut has_left_caret = false; let mut has_arrow = false; + let mut negative = false; let mut arrow_end = 0; for (i, c) in text.char_indices() { arrow_end = i + 1; @@ -69,6 +83,19 @@ pub fn parse_position_comments( has_left_caret = c == '<'; } + // find any ! after arrows but before capture name + if has_arrow { + for (i, c) in text[arrow_end..].char_indices() { + if c == '!' { + negative = true; + arrow_end += i + 1; + break; + } else if !c.is_whitespace() { + break; + } + } + } + // If the comment node contains an arrow and a highlight name, record the // highlight name and the position. if let (true, Some(mat)) = @@ -76,7 +103,8 @@ pub fn parse_position_comments( { assertion_ranges.push((node.start_position(), node.end_position())); result.push(Assertion { - position: position, + position, + negative, expected_capture_name: mat.as_str().to_string(), }); } @@ -99,7 +127,7 @@ pub fn parse_position_comments( // code *above* the assertion. There can be multiple lines of assertion comments, // so the positions may have to be decremented by more than one row. let mut i = 0; - for assertion in result.iter_mut() { + for assertion in &mut result { loop { let on_assertion_line = assertion_ranges[i..] .iter() @@ -124,14 +152,14 @@ pub fn parse_position_comments( } pub fn assert_expected_captures( - infos: Vec, + infos: &[CaptureInfo], path: String, parser: &mut Parser, - language: Language, + language: &Language, ) -> Result<()> { let contents = fs::read_to_string(path)?; let pairs = parse_position_comments(parser, language, contents.as_bytes())?; - for info in &infos { + for info in infos { if let Some(found) = pairs.iter().find(|p| { p.position.row == info.start.row && p.position >= info.start && p.position < info.end }) { @@ -141,7 +169,7 @@ pub fn assert_expected_captures( info.start, found.expected_capture_name, info.name - ))? + ))?; } } } diff --git a/cli/src/tags.rs b/cli/src/tags.rs index 457955d..4e2058c 100644 --- a/cli/src/tags.rs +++ b/cli/src/tags.rs @@ -1,14 +1,20 @@ -use super::util; +use std::{ + fs, + io::{self, Write}, + path::Path, + str, + time::Instant, +}; + use anyhow::{anyhow, Result}; -use std::io::{self, Write}; -use std::path::Path; -use std::time::Instant; -use std::{fs, str}; -use tree_sitter_loader::Loader; +use tree_sitter_loader::{Config, Loader}; use tree_sitter_tags::TagsContext; +use super::util; + pub fn generate_tags( loader: &Loader, + loader_config: &Config, scope: Option<&str>, paths: &[String], quiet: bool, @@ -18,37 +24,37 @@ pub fn generate_tags( if let Some(scope) = scope { lang = loader.language_configuration_for_scope(scope)?; if lang.is_none() { - return Err(anyhow!("Unknown scope '{}'", scope)); + return Err(anyhow!("Unknown scope '{scope}'")); } } let mut context = TagsContext::new(); - let cancellation_flag = util::cancel_on_stdin(); + let cancellation_flag = util::cancel_on_signal(); let stdout = io::stdout(); let mut stdout = stdout.lock(); for path in paths { let path = Path::new(&path); - let (language, language_config) = match lang { + let (language, language_config) = match lang.clone() { Some(v) => v, - None => match loader.language_configuration_for_file_name(path)? { - Some(v) => v, - None => { - eprintln!("No language found for path {:?}", path); + None => { + if let Some(v) = loader.language_configuration_for_file_name(path)? { + v + } else { + eprintln!("{}", util::lang_not_found_for_path(path, loader_config)); continue; } - }, + } }; if let Some(tags_config) = language_config.tags_config(language)? { - let indent; - if paths.len() > 1 { + let indent = if paths.len() > 1 { if !quiet { writeln!(&mut stdout, "{}", path.to_string_lossy())?; } - indent = "\t" + "\t" } else { - indent = ""; + "" }; let source = fs::read(path)?; @@ -61,8 +67,7 @@ pub fn generate_tags( if !quiet { write!( &mut stdout, - "{}{:<10}\t | {:<8}\t{} {} - {} `{}`", - indent, + "{indent}{:<10}\t | {:<8}\t{} {} - {} `{}`", str::from_utf8(&source[tag.name_range]).unwrap_or(""), &tags_config.syntax_type_name(tag.syntax_type_id), if tag.is_definition { "def" } else { "ref" }, @@ -77,20 +82,15 @@ pub fn generate_tags( write!(&mut stdout, "\t{:?}", &docs)?; } } - writeln!(&mut stdout, "")?; + writeln!(&mut stdout)?; } } if time { - writeln!( - &mut stdout, - "{}time: {}ms", - indent, - t0.elapsed().as_millis(), - )?; + writeln!(&mut stdout, "{indent}time: {}ms", t0.elapsed().as_millis(),)?; } } else { - eprintln!("No tags config found for path {:?}", path); + eprintln!("No tags config found for path {path:?}"); } } diff --git a/cli/src/test.rs b/cli/src/test.rs index 69c4a66..f9a95ca 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -1,32 +1,48 @@ -use super::util; +use std::{ + collections::{BTreeMap, HashSet}, + ffi::OsStr, + fs, + io::{self, Write}, + path::{Path, PathBuf}, + str, +}; + use ansi_term::Colour; use anyhow::{anyhow, Context, Result}; use difference::{Changeset, Difference}; +use indoc::indoc; use lazy_static::lazy_static; -use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder}; -use regex::Regex; -use std::ffi::OsStr; -use std::fmt::Write as FmtWrite; -use std::fs; -use std::io::{self, Write}; -use std::path::{Path, PathBuf}; -use std::str; -use tree_sitter::{Language, LogType, Parser, Query}; +use regex::{ + bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder}, + Regex, +}; +use tree_sitter::{format_sexp, Language, LogType, Parser, Query}; use walkdir::WalkDir; +use super::util; + lazy_static! { - static ref HEADER_REGEX: ByteRegex = - ByteRegexBuilder::new(r"^===+(?P[^=\r\n][^\r\n]*)?\r?\n(?P([^=\r\n][^\r\n]*\r?\n)+)===+(?P[^=\r\n][^\r\n]*)?\r?\n") + static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new( + r"^(?x) + (?P(?:=+){3,}) + (?P[^=\r\n][^\r\n]*)? + \r?\n + (?P(?:[^=][^\r\n]*\r?\n)+) + ===+ + (?P[^=\r\n][^\r\n]*)?\r?\n" + ) + .multi_line(true) + .build() + .unwrap(); + static ref DIVIDER_REGEX: ByteRegex = + ByteRegexBuilder::new(r"^(?P(?:-+){3,})(?P[^-\r\n][^\r\n]*)?\r?\n") .multi_line(true) .build() .unwrap(); - static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+(?P[^-\r\n][^\r\n]*)?\r?\n") - .multi_line(true) - .build() - .unwrap(); static ref COMMENT_REGEX: Regex = Regex::new(r"(?m)^\s*;.*$").unwrap(); static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap(); static ref SEXP_FIELD_REGEX: Regex = Regex::new(r" \w+: \(").unwrap(); + static ref POINT_REGEX: Regex = Regex::new(r"\s*\[\s*\d+\s*,\s*\d+\s*\]\s*").unwrap(); } #[derive(Debug, PartialEq, Eq)] @@ -40,13 +56,25 @@ pub enum TestEntry { name: String, input: Vec, output: String, + header_delim_len: usize, + divider_delim_len: usize, has_fields: bool, + attributes: TestAttributes, }, } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TestAttributes { + pub skip: bool, + pub platform: bool, + pub fail_fast: bool, + pub error: bool, + pub languages: Vec>, +} + impl Default for TestEntry { fn default() -> Self { - TestEntry::Group { + Self::Group { name: String::new(), children: Vec::new(), file_path: None, @@ -54,82 +82,175 @@ impl Default for TestEntry { } } -pub fn run_tests_at_path( - language: Language, - path: &Path, - debug: bool, - debug_graph: bool, - filter: Option<&str>, - update: bool, -) -> Result<()> { - let test_entry = parse_tests(path)?; +impl Default for TestAttributes { + fn default() -> Self { + Self { + skip: false, + platform: true, + fail_fast: false, + error: false, + languages: vec!["".into()], + } + } +} + +pub struct TestOptions<'a> { + pub path: PathBuf, + pub debug: bool, + pub debug_graph: bool, + pub filter: Option<&'a str>, + pub include: Option, + pub exclude: Option, + pub update: bool, + pub open_log: bool, + pub languages: BTreeMap<&'a str, &'a Language>, + pub color: bool, + pub test_num: usize, +} + +pub fn run_tests_at_path(parser: &mut Parser, opts: &mut TestOptions) -> Result<()> { + let test_entry = parse_tests(&opts.path)?; let mut _log_session = None; - let mut parser = Parser::new(); - parser.set_language(language)?; - if debug_graph { - _log_session = Some(util::log_graphs(&mut parser, "log.html")?); - } else if debug { + if opts.debug_graph { + _log_session = Some(util::log_graphs(parser, "log.html", opts.open_log)?); + } else if opts.debug { parser.set_logger(Some(Box::new(|log_type, message| { if log_type == LogType::Lex { - io::stderr().write(b" ").unwrap(); + io::stderr().write_all(b" ").unwrap(); } - write!(&mut io::stderr(), "{}\n", message).unwrap(); + writeln!(&mut io::stderr(), "{message}").unwrap(); }))); } let mut failures = Vec::new(); let mut corrected_entries = Vec::new(); + let mut has_parse_errors = false; run_tests( - &mut parser, + parser, test_entry, - filter, + opts, 0, &mut failures, - update, &mut corrected_entries, + &mut has_parse_errors, )?; - if failures.len() > 0 { - println!(""); + parser.stop_printing_dot_graphs(); + + if failures.is_empty() { + Ok(()) + } else { + println!(); - if update { + if opts.update && !has_parse_errors { if failures.len() == 1 { - println!("1 update:\n") + println!("1 update:\n"); } else { - println!("{} updates:\n", failures.len()) + println!("{} updates:\n", failures.len()); } for (i, (name, ..)) in failures.iter().enumerate() { - println!(" {}. {}", i + 1, name); + println!(" {}. {name}", i + 1); } + Ok(()) } else { - if failures.len() == 1 { - println!("1 failure:") - } else { - println!("{} failures:", failures.len()) + has_parse_errors = opts.update && has_parse_errors; + + if !has_parse_errors { + if failures.len() == 1 { + println!("1 failure:"); + } else { + println!("{} failures:", failures.len()); + } } - print_diff_key(); + if opts.color { + print_diff_key(); + } for (i, (name, actual, expected)) in failures.iter().enumerate() { - println!("\n {}. {}:", i + 1, name); - let actual = format_sexp_indented(&actual, 2); - let expected = format_sexp_indented(&expected, 2); - print_diff(&actual, &expected); + println!("\n {}. {name}:", i + 1); + let actual = format_sexp(actual, 2); + let expected = format_sexp(expected, 2); + print_diff(&actual, &expected, opts.color); + } + + if has_parse_errors { + Err(anyhow!(indoc! {" + Some tests failed to parse with unexpected `ERROR` or `MISSING` nodes, as shown above, and cannot be updated automatically. + Either fix the grammar or manually update the tests if this is expected."})) + } else { + Err(anyhow!("")) } - Err(anyhow!("")) } - } else { - Ok(()) } } -pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> { +#[allow(clippy::type_complexity)] +pub fn get_test_info<'test>( + test_entry: &'test TestEntry, + target_test: u32, + test_num: &mut u32, +) -> Option<(&'test str, &'test [u8], Vec>)> { + match test_entry { + TestEntry::Example { + name, + input, + attributes, + .. + } => { + if *test_num == target_test { + return Some((name, input, attributes.languages.clone())); + } else { + *test_num += 1; + } + } + TestEntry::Group { children, .. } => { + for child in children { + if let Some((name, input, languages)) = get_test_info(child, target_test, test_num) + { + return Some((name, input, languages)); + } + } + } + } + + None +} + +/// Writes the input of `target_test` to a temporary file and returns the path +pub fn get_tmp_test_file(target_test: u32, color: bool) -> Result<(PathBuf, Vec>)> { + let current_dir = std::env::current_dir().unwrap(); + let test_dir = current_dir.join("test").join("corpus"); + + // Get the input of the target test + let test_entry = parse_tests(&test_dir)?; + let mut test_num = 0; + let Some((test_name, test_contents, languages)) = + get_test_info(&test_entry, target_test - 1, &mut test_num) + else { + return Err(anyhow!("Failed to fetch contents of test #{target_test}")); + }; + + // Write the test contents to a temporary file + let test_path = std::env::temp_dir().join(".tree-sitter-test"); + let mut test_file = std::fs::File::create(&test_path)?; + test_file.write_all(test_contents)?; + + println!( + "{target_test}. {}\n", + opt_color(color, Colour::Green, test_name) + ); + + Ok((test_path, languages)) +} + +pub fn check_queries_at_path(language: &Language, path: &Path) -> Result<()> { if path.exists() { for entry in WalkDir::new(path) .into_iter() - .filter_map(|e| e.ok()) + .filter_map(std::result::Result::ok) .filter(|e| { e.file_type().is_file() && e.path().extension().and_then(OsStr::to_str) == Some("scm") @@ -138,9 +259,9 @@ pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> { { let filepath = entry.file_name().to_str().unwrap_or(""); let content = fs::read_to_string(entry.path()) - .with_context(|| format!("Error reading query file {:?}", filepath))?; + .with_context(|| format!("Error reading query file {filepath:?}"))?; Query::new(language, &content) - .with_context(|| format!("Error in query file {:?}", filepath))?; + .with_context(|| format!("Error in query file {filepath:?}"))?; } } Ok(()) @@ -148,193 +269,320 @@ pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> { pub fn print_diff_key() { println!( - "\n{} / {}", + "\ncorrect / {} / {}", Colour::Green.paint("expected"), - Colour::Red.paint("actual") + Colour::Red.paint("unexpected") ); } -pub fn print_diff(actual: &String, expected: &String) { +pub fn print_diff(actual: &str, expected: &str, use_color: bool) { let changeset = Changeset::new(actual, expected, "\n"); for diff in &changeset.diffs { match diff { Difference::Same(part) => { - print!("{}{}", part, changeset.split); + if use_color { + print!("{part}{}", changeset.split); + } else { + print!("correct:\n{part}{}", changeset.split); + } } Difference::Add(part) => { - print!("{}{}", Colour::Green.paint(part), changeset.split); + if use_color { + print!("{}{}", Colour::Green.paint(part), changeset.split); + } else { + print!("expected:\n{part}{}", changeset.split); + } } Difference::Rem(part) => { - print!("{}{}", Colour::Red.paint(part), changeset.split); + if use_color { + print!("{}{}", Colour::Red.paint(part), changeset.split); + } else { + print!("unexpected:\n{part}{}", changeset.split); + } } } } - println!(""); + println!(); +} + +pub fn opt_color(use_color: bool, color: ansi_term::Colour, text: &str) -> String { + if use_color { + color.paint(text).to_string() + } else { + text.to_string() + } } +#[allow(clippy::too_many_arguments)] fn run_tests( parser: &mut Parser, test_entry: TestEntry, - filter: Option<&str>, + opts: &mut TestOptions, mut indent_level: i32, failures: &mut Vec<(String, String, String)>, - update: bool, - corrected_entries: &mut Vec<(String, String, String)>, -) -> Result<()> { + corrected_entries: &mut Vec<(String, String, String, usize, usize)>, + has_parse_errors: &mut bool, +) -> Result { match test_entry { TestEntry::Example { name, input, output, + header_delim_len, + divider_delim_len, has_fields, + attributes, } => { - if let Some(filter) = filter { - if !name.contains(filter) { - if update { - let input = String::from_utf8(input).unwrap(); - let output = format_sexp(&output); - corrected_entries.push((name, input, output)); - } - return Ok(()); - } + print!("{}", " ".repeat(indent_level as usize)); + + if attributes.skip { + println!( + "{:>3}.  {}", + opts.test_num, + opt_color(opts.color, Colour::Yellow, &name), + ); + return Ok(true); } - let tree = parser.parse(&input, None).unwrap(); - let mut actual = tree.root_node().to_sexp(); - if !has_fields { - actual = strip_sexp_fields(actual); - } - for _ in 0..indent_level { - print!(" "); + + if !attributes.platform { + println!( + "{:>3}.  {}", + opts.test_num, + opt_color(opts.color, Colour::Purple, &name) + ); + return Ok(true); } - if actual == output { - println!("✓ {}", Colour::Green.paint(&name)); - if update { - let input = String::from_utf8(input).unwrap(); - let output = format_sexp(&output); - corrected_entries.push((name, input, output)); + + for (i, language_name) in attributes.languages.iter().enumerate() { + if !language_name.is_empty() { + let language = opts + .languages + .get(language_name.as_ref()) + .ok_or_else(|| anyhow!("Language not found: {language_name}"))?; + parser.set_language(language)?; } - } else { - if update { - let input = String::from_utf8(input).unwrap(); - let output = format_sexp(&actual); - corrected_entries.push((name.clone(), input, output)); - println!("✓ {}", Colour::Blue.paint(&name)); + let tree = parser.parse(&input, None).unwrap(); + + if attributes.error { + if tree.root_node().has_error() { + println!( + "{:>3}.  {}", + opts.test_num, + opt_color(opts.color, Colour::Green, &name) + ); + } else { + println!( + "{:>3}.  {}", + opts.test_num, + opt_color(opts.color, Colour::Red, &name) + ); + } + + if attributes.fail_fast { + return Ok(false); + } } else { - println!("✗ {}", Colour::Red.paint(&name)); + let mut actual = tree.root_node().to_sexp(); + if !has_fields { + actual = strip_sexp_fields(&actual); + } + + if actual == output { + println!( + "{:>3}. ✓ {}", + opts.test_num, + opt_color(opts.color, Colour::Green, &name), + ); + if opts.update { + let input = String::from_utf8(input.clone()).unwrap(); + let output = format_sexp(&output, 0); + corrected_entries.push(( + name.clone(), + input, + output, + header_delim_len, + divider_delim_len, + )); + } + } else { + if opts.update { + let input = String::from_utf8(input.clone()).unwrap(); + let expected_output = format_sexp(&output, 0); + let actual_output = format_sexp(&actual, 0); + + // Only bail early before updating if the actual is not the output, + // sometimes users want to test cases that + // are intended to have errors, hence why this + // check isn't shown above + if actual.contains("ERROR") || actual.contains("MISSING") { + *has_parse_errors = true; + + // keep the original `expected` output if the actual output has an + // error + corrected_entries.push(( + name.clone(), + input, + expected_output, + header_delim_len, + divider_delim_len, + )); + } else { + corrected_entries.push(( + name.clone(), + input, + actual_output, + header_delim_len, + divider_delim_len, + )); + println!( + "{:>3}. ✓ {}", + opts.test_num, + opt_color(opts.color, Colour::Blue, &name) + ); + } + } else { + println!( + "{:>3}. ✗ {}", + opts.test_num, + opt_color(opts.color, Colour::Red, &name) + ); + } + failures.push((name.clone(), actual, output.clone())); + + if attributes.fail_fast { + // return value of false means to fail fast + return Ok(false); + } + + if i == attributes.languages.len() - 1 { + // reset back to first language + parser.set_language(opts.languages.values().next().unwrap())?; + } + } } - failures.push((name, actual, output)); } + opts.test_num += 1; } TestEntry::Group { name, - children, + mut children, file_path, } => { - if indent_level > 0 { - for _ in 0..indent_level { - print!(" "); + // track which tests are being skipped to maintain consistent numbering while using + // filters + let mut skipped_tests = HashSet::new(); + let mut advance_counter = opts.test_num; + children.retain(|child| match child { + TestEntry::Example { name, .. } => { + if let Some(filter) = opts.filter { + if !name.contains(filter) { + skipped_tests.insert(advance_counter); + advance_counter += 1; + return false; + } + } + if let Some(include) = &opts.include { + if !include.is_match(name) { + skipped_tests.insert(advance_counter); + advance_counter += 1; + return false; + } + } + if let Some(exclude) = &opts.exclude { + if exclude.is_match(name) { + skipped_tests.insert(advance_counter); + advance_counter += 1; + return false; + } + } + advance_counter += 1; + true + } + TestEntry::Group { .. } => { + advance_counter += count_subtests(child); + true } - println!("{}:", name); + }); + + if children.is_empty() { + opts.test_num = advance_counter; + return Ok(true); + } + + if indent_level > 0 { + print!("{}", " ".repeat(indent_level as usize)); + println!("{name}:"); } let failure_count = failures.len(); indent_level += 1; for child in children { - run_tests( + if let TestEntry::Example { .. } = child { + while skipped_tests.remove(&opts.test_num) { + opts.test_num += 1; + } + } + if !run_tests( parser, child, - filter, + opts, indent_level, failures, - update, corrected_entries, - )?; + has_parse_errors, + )? { + // fail fast + return Ok(false); + } } + opts.test_num += skipped_tests.len(); + if let Some(file_path) = file_path { - if update && failures.len() - failure_count > 0 { + if opts.update && failures.len() - failure_count > 0 { write_tests(&file_path, corrected_entries)?; } corrected_entries.clear(); } } } - Ok(()) + Ok(true) } -fn format_sexp(sexp: &String) -> String { - format_sexp_indented(sexp, 0) -} - -fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String { - let mut formatted = String::new(); - - let mut indent_level = initial_indent_level; - let mut has_field = false; - let mut s_iter = sexp.split(|c| c == ' ' || c == ')'); - while let Some(s) = s_iter.next() { - if s.is_empty() { - // ")" - indent_level -= 1; - write!(formatted, ")").unwrap(); - } else if s.starts_with('(') { - if has_field { - has_field = false; - } else { - if indent_level > 0 { - writeln!(formatted, "").unwrap(); - for _ in 0..indent_level { - write!(formatted, " ").unwrap(); - } - } - indent_level += 1; - } - - // "(node_name" - write!(formatted, "{}", s).unwrap(); - - // "(MISSING node_name" or "(UNEXPECTED 'x'" - if s.starts_with("(MISSING") || s.starts_with("(UNEXPECTED") { - let s = s_iter.next().unwrap(); - write!(formatted, " {}", s).unwrap(); - } - } else if s.ends_with(':') { - // "field:" - writeln!(formatted, "").unwrap(); - for _ in 0..indent_level { - write!(formatted, " ").unwrap(); - } - write!(formatted, "{} ", s).unwrap(); - has_field = true; - indent_level += 1; - } +fn count_subtests(test_entry: &TestEntry) -> usize { + match test_entry { + TestEntry::Example { .. } => 1, + TestEntry::Group { children, .. } => children + .iter() + .fold(0, |count, child| count + count_subtests(child)), } - - formatted } -fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String)>) -> Result<()> { +fn write_tests( + file_path: &Path, + corrected_entries: &[(String, String, String, usize, usize)], +) -> Result<()> { let mut buffer = fs::File::create(file_path)?; write_tests_to_buffer(&mut buffer, corrected_entries) } fn write_tests_to_buffer( buffer: &mut impl Write, - corrected_entries: &Vec<(String, String, String)>, + corrected_entries: &[(String, String, String, usize, usize)], ) -> Result<()> { - for (i, (name, input, output)) in corrected_entries.iter().enumerate() { + for (i, (name, input, output, header_delim_len, divider_delim_len)) in + corrected_entries.iter().enumerate() + { if i > 0 { - write!(buffer, "\n")?; + writeln!(buffer)?; } - write!( + writeln!( buffer, - "{}\n{}\n{}\n{}\n{}\n\n{}\n", - "=".repeat(80), - name, - "=".repeat(80), - input, - "-".repeat(80), + "{}\n{name}\n{}\n{input}\n{}\n\n{}", + "=".repeat(*header_delim_len), + "=".repeat(*header_delim_len), + "-".repeat(*divider_delim_len), output.trim() )?; } @@ -351,11 +599,20 @@ pub fn parse_tests(path: &Path) -> io::Result { let mut children = Vec::new(); for entry in fs::read_dir(path)? { let entry = entry?; - let hidden = entry.file_name().to_str().unwrap_or("").starts_with("."); + let hidden = entry.file_name().to_str().unwrap_or("").starts_with('.'); if !hidden { - children.push(parse_tests(&entry.path())?); + children.push(entry.path()); } } + children.sort_by(|a, b| { + a.file_name() + .unwrap_or_default() + .cmp(b.file_name().unwrap_or_default()) + }); + let children = children + .iter() + .map(|path| parse_tests(path)) + .collect::>>()?; Ok(TestEntry::Group { name, children, @@ -363,15 +620,21 @@ pub fn parse_tests(path: &Path) -> io::Result { }) } else { let content = fs::read_to_string(path)?; - Ok(parse_test_content(name, content, Some(path.to_path_buf()))) + Ok(parse_test_content(name, &content, Some(path.to_path_buf()))) } } -pub fn strip_sexp_fields(sexp: String) -> String { - SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string() +#[must_use] +pub fn strip_sexp_fields(sexp: &str) -> String { + SEXP_FIELD_REGEX.replace_all(sexp, " (").to_string() +} + +#[must_use] +pub fn strip_points(sexp: &str) -> String { + POINT_REGEX.replace_all(sexp, "").to_string() } -fn parse_test_content(name: String, content: String, file_path: Option) -> TestEntry { +fn parse_test_content(name: String, content: &str, file_path: Option) -> TestEntry { let mut children = Vec::new(); let bytes = content.as_bytes(); let mut prev_name = String::new(); @@ -388,25 +651,103 @@ fn parse_test_content(name: String, content: String, file_path: Option) // Find all of the `===` test headers, which contain the test names. // Ignore any matches whose suffix does not match the first header // suffix in the file. - let header_matches = HEADER_REGEX.captures_iter(&bytes).filter_map(|c| { + let header_matches = HEADER_REGEX.captures_iter(bytes).filter_map(|c| { + let header_delim_len = c.name("equals").map_or(80, |m| m.as_bytes().len()); let suffix1 = c .name("suffix1") .map(|m| String::from_utf8_lossy(m.as_bytes())); let suffix2 = c .name("suffix2") .map(|m| String::from_utf8_lossy(m.as_bytes())); + + let (mut skip, mut platform, mut fail_fast, mut error, mut languages) = + (false, None, false, false, vec![]); + + let test_name_and_markers = c + .name("test_name_and_markers") + .map_or("".as_bytes(), |m| m.as_bytes()); + + let mut test_name = String::new(); + let mut seen_marker = false; + + for line in str::from_utf8(test_name_and_markers) + .unwrap() + .lines() + .filter(|s| !s.is_empty()) + { + match line.split('(').next().unwrap() { + ":skip" => (seen_marker, skip) = (true, true), + ":platform" => { + if let Some(platforms) = line.strip_prefix(':').and_then(|s| { + s.strip_prefix("platform(") + .and_then(|s| s.strip_suffix(')')) + }) { + seen_marker = true; + platform = Some( + platform.unwrap_or(false) || platforms.trim() == std::env::consts::OS, + ); + } + } + ":fail-fast" => (seen_marker, fail_fast) = (true, true), + ":error" => (seen_marker, error) = (true, true), + ":language" => { + if let Some(lang) = line.strip_prefix(':').and_then(|s| { + s.strip_prefix("language(") + .and_then(|s| s.strip_suffix(')')) + }) { + seen_marker = true; + languages.push(lang.into()); + } + } + _ if !seen_marker => { + test_name.push_str(line); + test_name.push('\n'); + } + _ => {} + } + } + + // prefer skip over error, both shouldn't be set + if skip { + error = false; + } + + // add a default language if none are specified, will defer to the first language + if languages.is_empty() { + languages.push("".into()); + } + if suffix1 == first_suffix && suffix2 == first_suffix { let header_range = c.get(0).unwrap().range(); - let test_name = c - .name("test_name") - .map(|c| String::from_utf8_lossy(c.as_bytes()).trim_end().to_string()); - Some((header_range, test_name)) + let test_name = if test_name.is_empty() { + None + } else { + Some(test_name.trim_end().to_string()) + }; + Some(( + header_delim_len, + header_range, + test_name, + TestAttributes { + skip, + platform: platform.unwrap_or(true), + fail_fast, + error, + languages, + }, + )) } else { None } }); - for (header_range, test_name) in header_matches.chain(Some((bytes.len()..bytes.len(), None))) { + let (mut prev_header_len, mut prev_attributes) = (80, TestAttributes::default()); + for (header_delim_len, header_range, test_name, attributes) in header_matches.chain(Some(( + 80, + bytes.len()..bytes.len(), + None, + TestAttributes::default(), + ))) { // Find the longest line of dashes following each test description. That line // separates the input from the expected output. Ignore any matches whose suffix // does not match the first suffix in the file. @@ -414,19 +755,23 @@ fn parse_test_content(name: String, content: String, file_path: Option) let divider_range = DIVIDER_REGEX .captures_iter(&bytes[prev_header_end..header_range.start]) .filter_map(|m| { + let divider_delim_len = m.name("hyphens").map_or(80, |m| m.as_bytes().len()); let suffix = m .name("suffix") .map(|m| String::from_utf8_lossy(m.as_bytes())); if suffix == first_suffix { let range = m.get(0).unwrap().range(); - Some((prev_header_end + range.start)..(prev_header_end + range.end)) + Some(( + divider_delim_len, + (prev_header_end + range.start)..(prev_header_end + range.end), + )) } else { None } }) - .max_by_key(|range| range.len()); + .max_by_key(|(_, range)| range.len()); - if let Some(divider_range) = divider_range { + if let Some((divider_delim_len, divider_range)) = divider_range { if let Ok(output) = str::from_utf8(&bytes[divider_range.end..header_range.start]) { let mut input = bytes[prev_header_end..divider_range.start].to_vec(); @@ -447,16 +792,23 @@ fn parse_test_content(name: String, content: String, file_path: Option) // fields will not be checked. let has_fields = SEXP_FIELD_REGEX.is_match(&output); - children.push(TestEntry::Example { + let t = TestEntry::Example { name: prev_name, input, output, + header_delim_len: prev_header_len, + divider_delim_len, has_fields, - }); + attributes: prev_attributes, + }; + + children.push(t); } } } - prev_name = test_name.unwrap_or(String::new()); + prev_attributes = attributes; + prev_name = test_name.unwrap_or_default(); + prev_header_len = header_delim_len; prev_header_end = header_range.end; } TestEntry::Group { @@ -474,7 +826,7 @@ mod tests { fn test_parse_test_content_simple() { let entry = parse_test_content( "the-filename".to_string(), - r#" + r" =============== The first test =============== @@ -492,9 +844,8 @@ The second test d --- (d) - "# - .trim() - .to_string(), + " + .trim(), None, ); @@ -505,15 +856,21 @@ d children: vec![ TestEntry::Example { name: "The first test".to_string(), - input: "\na b c\n".as_bytes().to_vec(), + input: b"\na b c\n".to_vec(), output: "(a (b c))".to_string(), + header_delim_len: 15, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "The second test".to_string(), - input: "d".as_bytes().to_vec(), + input: b"d".to_vec(), output: "(d)".to_string(), + header_delim_len: 16, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, ], file_path: None, @@ -525,7 +882,7 @@ d fn test_parse_test_content_with_dashes_in_source_code() { let entry = parse_test_content( "the-filename".to_string(), - r#" + r" ================== Code with dashes ================== @@ -546,9 +903,8 @@ abc ------------------- (c (d)) - "# - .trim() - .to_string(), + " + .trim(), None, ); @@ -559,15 +915,21 @@ abc children: vec![ TestEntry::Example { name: "Code with dashes".to_string(), - input: "abc\n---\ndefg\n----\nhijkl".as_bytes().to_vec(), + input: b"abc\n---\ndefg\n----\nhijkl".to_vec(), output: "(a (b))".to_string(), + header_delim_len: 18, + divider_delim_len: 7, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "Code ending with dashes".to_string(), - input: "abc\n-----------".as_bytes().to_vec(), + input: b"abc\n-----------".to_vec(), output: "(c (d))".to_string(), + header_delim_len: 25, + divider_delim_len: 19, has_fields: false, + attributes: TestAttributes::default(), }, ], file_path: None, @@ -577,9 +939,10 @@ abc #[test] fn test_format_sexp() { + assert_eq!(format_sexp("", 0), ""); assert_eq!( - format_sexp(&"(a b: (c) (d) e: (f (g (h (MISSING i)))))".to_string()), - r#" + format_sexp("(a b: (c) (d) e: (f (g (h (MISSING i)))))", 0), + r" (a b: (c) (d) @@ -587,18 +950,39 @@ abc (g (h (MISSING i))))) -"# +" + .trim() + ); + assert_eq!( + format_sexp("(program (ERROR (UNEXPECTED ' ')) (identifier))", 0), + r" +(program + (ERROR + (UNEXPECTED ' ')) + (identifier)) +" .trim() - .to_string() ); - assert_eq!(format_sexp(&"()".to_string()), "()".to_string()); assert_eq!( - format_sexp(&"(A (M (B)))".to_string()), - "(A\n (M\n (B)))" + format_sexp(r#"(source_file (MISSING ")"))"#, 0), + r#" +(source_file + (MISSING ")")) + "# + .trim() ); assert_eq!( - format_sexp(&"(A (U (B)))".to_string()), - "(A\n (U\n (B)))" + format_sexp( + r"(source_file (ERROR (UNEXPECTED 'f') (UNEXPECTED '+')))", + 0 + ), + r#" +(source_file + (ERROR + (UNEXPECTED 'f') + (UNEXPECTED '+'))) +"# + .trim() ); } @@ -610,17 +994,21 @@ abc "title 1".to_string(), "input 1".to_string(), "output 1".to_string(), + 80, + 80, ), ( "title 2".to_string(), "input 2".to_string(), "output 2".to_string(), + 80, + 80, ), ]; write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap(); assert_eq!( String::from_utf8(buffer).unwrap(), - r#" + r" ================================================================================ title 1 ================================================================================ @@ -636,7 +1024,7 @@ input 2 -------------------------------------------------------------------------------- output 2 -"# +" .trim_start() .to_string() ); @@ -663,7 +1051,7 @@ code --- ; Line start comment -(a +(a ; ignore this (b) ; also ignore this @@ -677,8 +1065,7 @@ code (MISSING ";") "# - .trim() - .to_string(), + .trim(), None, ); @@ -689,21 +1076,30 @@ code children: vec![ TestEntry::Example { name: "sexp with comment".to_string(), - input: "code".as_bytes().to_vec(), + input: b"code".to_vec(), output: "(a (b))".to_string(), + header_delim_len: 18, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "sexp with comment between".to_string(), - input: "code".as_bytes().to_vec(), + input: b"code".to_vec(), output: "(a (b))".to_string(), + header_delim_len: 18, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "sexp with ';'".to_string(), - input: "code".as_bytes().to_vec(), + input: b"code".to_vec(), output: "(MISSING \";\")".to_string(), + header_delim_len: 25, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), } ], file_path: None, @@ -715,7 +1111,7 @@ code fn test_parse_test_content_with_suffixes() { let entry = parse_test_content( "the-filename".to_string(), - r#" + r" ==================asdf\()[]|{}*+?^$.- First test ==================asdf\()[]|{}*+?^$.- @@ -754,17 +1150,15 @@ NOT A TEST HEADER ---asdf\()[]|{}*+?^$.- (a) - "# - .trim() - .to_string(), + " + .trim(), None, ); - let expected_input = "\n=========================\n\ + let expected_input = b"\n=========================\n\ NOT A TEST HEADER\n\ =========================\n\ -------------------------\n" - .as_bytes() .to_vec(); assert_eq!( entry, @@ -775,19 +1169,28 @@ NOT A TEST HEADER name: "First test".to_string(), input: expected_input.clone(), output: "(a)".to_string(), + header_delim_len: 18, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "Second test".to_string(), input: expected_input.clone(), output: "(a)".to_string(), + header_delim_len: 18, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "Test name with = symbol".to_string(), - input: expected_input.clone(), + input: expected_input, output: "(a)".to_string(), + header_delim_len: 25, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), } ], file_path: None, @@ -799,7 +1202,7 @@ NOT A TEST HEADER fn test_parse_test_content_with_newlines_in_test_names() { let entry = parse_test_content( "the-filename".to_string(), - r#" + r" =============== name with @@ -815,8 +1218,7 @@ name with === signs code with ---- --- (d) -"# - .to_string(), +", None, ); @@ -830,13 +1232,133 @@ code with ---- name: "name\nwith\nnewlines".to_string(), input: b"a".to_vec(), output: "(b)".to_string(), + header_delim_len: 15, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "name with === signs".to_string(), input: b"code with ----".to_vec(), output: "(d)".to_string(), + header_delim_len: 20, + divider_delim_len: 3, + has_fields: false, + attributes: TestAttributes::default(), + } + ] + } + ); + } + + #[test] + fn test_parse_test_with_markers() { + // do one with :skip, we should not see it in the entry output + + let entry = parse_test_content( + "the-filename".to_string(), + r" +===================== +Test with skip marker +:skip +===================== +a +--- +(b) +", + None, + ); + + assert_eq!( + entry, + TestEntry::Group { + name: "the-filename".to_string(), + file_path: None, + children: vec![TestEntry::Example { + name: "Test with skip marker".to_string(), + input: b"a".to_vec(), + output: "(b)".to_string(), + header_delim_len: 21, + divider_delim_len: 3, + has_fields: false, + attributes: TestAttributes { + skip: true, + platform: true, + fail_fast: false, + error: false, + languages: vec!["".into()] + }, + }] + } + ); + + let entry = parse_test_content( + "the-filename".to_string(), + &format!( + r" +========================= +Test with platform marker +:platform({}) +:fail-fast +========================= +a +--- +(b) + +============================= +Test with bad platform marker +:platform({}) +:language(foo) +============================= +a +--- +(b) +", + std::env::consts::OS, + if std::env::consts::OS == "linux" { + "macos" + } else { + "linux" + } + ), + None, + ); + + assert_eq!( + entry, + TestEntry::Group { + name: "the-filename".to_string(), + file_path: None, + children: vec![ + TestEntry::Example { + name: "Test with platform marker".to_string(), + input: b"a".to_vec(), + output: "(b)".to_string(), + header_delim_len: 25, + divider_delim_len: 3, + has_fields: false, + attributes: TestAttributes { + skip: false, + platform: true, + fail_fast: true, + error: false, + languages: vec!["".into()] + }, + }, + TestEntry::Example { + name: "Test with bad platform marker".to_string(), + input: b"a".to_vec(), + output: "(b)".to_string(), + header_delim_len: 29, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes { + skip: false, + platform: false, + fail_fast: false, + error: false, + languages: vec!["foo".into()] + }, } ] } diff --git a/cli/src/test_highlight.rs b/cli/src/test_highlight.rs index 2d9d536..e8c8cb6 100644 --- a/cli/src/test_highlight.rs +++ b/cli/src/test_highlight.rs @@ -1,11 +1,16 @@ -use crate::query_testing::{parse_position_comments, Assertion}; +use std::{fs, path::Path}; + use ansi_term::Colour; use anyhow::{anyhow, Result}; -use std::fs; -use std::path::Path; use tree_sitter::Point; use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter}; -use tree_sitter_loader::Loader; +use tree_sitter_loader::{Config, Loader}; + +use super::{ + query_testing::{parse_position_comments, Assertion}, + test::opt_color, + util, +}; #[derive(Debug)] pub struct Failure { @@ -31,49 +36,102 @@ impl std::fmt::Display for Failure { if i > 0 { write!(f, ", ")?; } - write!(f, "'{}'", actual_highlight)?; + write!(f, "'{actual_highlight}'")?; } } Ok(()) } } -pub fn test_highlights(loader: &Loader, directory: &Path) -> Result<()> { +pub fn test_highlights( + loader: &Loader, + loader_config: &Config, + highlighter: &mut Highlighter, + directory: &Path, + use_color: bool, +) -> Result<()> { + println!("syntax highlighting:"); + test_highlights_indented(loader, loader_config, highlighter, directory, use_color, 2) +} + +fn test_highlights_indented( + loader: &Loader, + loader_config: &Config, + highlighter: &mut Highlighter, + directory: &Path, + use_color: bool, + indent_level: usize, +) -> Result<()> { let mut failed = false; - let mut highlighter = Highlighter::new(); - println!("syntax highlighting:"); for highlight_test_file in fs::read_dir(directory)? { let highlight_test_file = highlight_test_file?; let test_file_path = highlight_test_file.path(); let test_file_name = highlight_test_file.file_name(); - let (language, language_config) = loader - .language_configuration_for_file_name(&test_file_path)? - .ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?; - let highlight_config = language_config - .highlight_config(language)? - .ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?; - match test_highlight( - &loader, - &mut highlighter, - highlight_config, - fs::read(&test_file_path)?.as_slice(), - ) { - Ok(assertion_count) => { - println!( - " ✓ {} ({} assertions)", - Colour::Green.paint(test_file_name.to_string_lossy().as_ref()), - assertion_count - ); - } - Err(e) => { - println!( - " ✗ {}", - Colour::Red.paint(test_file_name.to_string_lossy().as_ref()) - ); - println!(" {}", e); + print!( + "{indent:indent_level$}", + indent = "", + indent_level = indent_level * 2 + ); + if test_file_path.is_dir() && test_file_path.read_dir()?.next().is_some() { + println!("{}:", test_file_name.into_string().unwrap()); + if test_highlights_indented( + loader, + loader_config, + highlighter, + &test_file_path, + use_color, + indent_level + 1, + ) + .is_err() + { failed = true; } + } else { + let (language, language_config) = loader + .language_configuration_for_file_name(&test_file_path)? + .ok_or_else(|| { + anyhow!( + "{}", + util::lang_not_found_for_path(test_file_path.as_path(), loader_config) + ) + })?; + let highlight_config = language_config + .highlight_config(language, None)? + .ok_or_else(|| anyhow!("No highlighting config found for {test_file_path:?}"))?; + match test_highlight( + loader, + highlighter, + highlight_config, + fs::read(&test_file_path)?.as_slice(), + ) { + Ok(assertion_count) => { + println!( + "✓ {} ({assertion_count} assertions)", + opt_color( + use_color, + Colour::Green, + test_file_name.to_string_lossy().as_ref() + ), + ); + } + Err(e) => { + println!( + "✗ {}", + opt_color( + use_color, + Colour::Red, + test_file_name.to_string_lossy().as_ref() + ) + ); + println!( + "{indent:indent_level$} {e}", + indent = "", + indent_level = indent_level * 2 + ); + failed = true; + } + } } } @@ -84,55 +142,53 @@ pub fn test_highlights(loader: &Loader, directory: &Path) -> Result<()> { } } pub fn iterate_assertions( - assertions: &Vec, - highlights: &Vec<(Point, Point, Highlight)>, - highlight_names: &Vec, + assertions: &[Assertion], + highlights: &[(Point, Point, Highlight)], + highlight_names: &[String], ) -> Result { // Iterate through all of the highlighting assertions, checking each one against the // actual highlights. let mut i = 0; - let mut actual_highlights = Vec::<&String>::new(); + let mut actual_highlights = Vec::new(); for Assertion { position, + negative, expected_capture_name: expected_highlight, } in assertions { let mut passed = false; actual_highlights.clear(); - 'highlight_loop: loop { - // The assertions are ordered by position, so skip past all of the highlights that - // end at or before this assertion's position. - if let Some(highlight) = highlights.get(i) { - if highlight.1 <= *position { - i += 1; - continue; - } - - // Iterate through all of the highlights that start at or before this assertion's, - // position, looking for one that matches the assertion. - let mut j = i; - while let (false, Some(highlight)) = (passed, highlights.get(j)) { - if highlight.0 > *position { - break 'highlight_loop; - } + // The assertions are ordered by position, so skip past all of the highlights that + // end at or before this assertion's position. + 'highlight_loop: while let Some(highlight) = highlights.get(i) { + if highlight.1 <= *position { + i += 1; + continue; + } - // If the highlight matches the assertion, this test passes. Otherwise, - // add this highlight to the list of actual highlights that span the - // assertion's position, in order to generate an error message in the event - // of a failure. - let highlight_name = &highlight_names[(highlight.2).0]; - if *highlight_name == *expected_highlight { - passed = true; - break 'highlight_loop; - } else { - actual_highlights.push(highlight_name); - } + // Iterate through all of the highlights that start at or before this assertion's, + // position, looking for one that matches the assertion. + let mut j = i; + while let (false, Some(highlight)) = (passed, highlights.get(j)) { + if highlight.0 > *position { + break 'highlight_loop; + } - j += 1; + // If the highlight matches the assertion, or if the highlight doesn't + // match the assertion but it's negative, this test passes. Otherwise, + // add this highlight to the list of actual highlights that span the + // assertion's position, in order to generate an error message in the event + // of a failure. + let highlight_name = &highlight_names[(highlight.2).0]; + if (*highlight_name == *expected_highlight) == *negative { + actual_highlights.push(highlight_name); + } else { + passed = true; + break 'highlight_loop; } - } else { - break; + + j += 1; } } @@ -160,70 +216,9 @@ pub fn test_highlight( let highlight_names = loader.highlight_names(); let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?; let assertions = - parse_position_comments(highlighter.parser(), highlight_config.language, source)?; - - iterate_assertions(&assertions, &highlights, &highlight_names)?; + parse_position_comments(highlighter.parser(), &highlight_config.language, source)?; - // Iterate through all of the highlighting assertions, checking each one against the - // actual highlights. - let mut i = 0; - let mut actual_highlights = Vec::<&String>::new(); - for Assertion { - position, - expected_capture_name: expected_highlight, - } in &assertions - { - let mut passed = false; - actual_highlights.clear(); - - 'highlight_loop: loop { - // The assertions are ordered by position, so skip past all of the highlights that - // end at or before this assertion's position. - if let Some(highlight) = highlights.get(i) { - if highlight.1 <= *position { - i += 1; - continue; - } - - // Iterate through all of the highlights that start at or before this assertion's, - // position, looking for one that matches the assertion. - let mut j = i; - while let (false, Some(highlight)) = (passed, highlights.get(j)) { - if highlight.0 > *position { - break 'highlight_loop; - } - - // If the highlight matches the assertion, this test passes. Otherwise, - // add this highlight to the list of actual highlights that span the - // assertion's position, in order to generate an error message in the event - // of a failure. - let highlight_name = &highlight_names[(highlight.2).0]; - if *highlight_name == *expected_highlight { - passed = true; - break 'highlight_loop; - } else { - actual_highlights.push(highlight_name); - } - - j += 1; - } - } else { - break; - } - } - - if !passed { - return Err(Failure { - row: position.row, - column: position.column, - expected_highlight: expected_highlight.clone(), - actual_highlights: actual_highlights.into_iter().cloned().collect(), - } - .into()); - } - } - - Ok(assertions.len()) + iterate_assertions(&assertions, &highlights, &highlight_names) } pub fn get_highlight_positions( @@ -268,7 +263,7 @@ pub fn get_highlight_positions( } } if let Some(highlight) = highlight_stack.last() { - result.push((start_position, Point::new(row, column), *highlight)) + result.push((start_position, Point::new(row, column), *highlight)); } } } diff --git a/cli/src/test_tags.rs b/cli/src/test_tags.rs index 024d094..56e1065 100644 --- a/cli/src/test_tags.rs +++ b/cli/src/test_tags.rs @@ -1,12 +1,17 @@ -use crate::query_testing::{parse_position_comments, Assertion}; +use std::{fs, path::Path}; + use ansi_term::Colour; use anyhow::{anyhow, Result}; -use std::fs; -use std::path::Path; use tree_sitter::Point; -use tree_sitter_loader::Loader; +use tree_sitter_loader::{Config, Loader}; use tree_sitter_tags::{TagsConfiguration, TagsContext}; +use super::{ + query_testing::{parse_position_comments, Assertion}, + test::opt_color, + util, +}; + #[derive(Debug)] pub struct Failure { row: usize, @@ -31,17 +36,21 @@ impl std::fmt::Display for Failure { if i > 0 { write!(f, ", ")?; } - write!(f, "'{}'", actual_tag)?; + write!(f, "'{actual_tag}'")?; } } Ok(()) } } -pub fn test_tags(loader: &Loader, directory: &Path) -> Result<()> { +pub fn test_tags( + loader: &Loader, + loader_config: &Config, + tags_context: &mut TagsContext, + directory: &Path, + use_color: bool, +) -> Result<()> { let mut failed = false; - let mut tags_context = TagsContext::new(); - println!("tags:"); for tag_test_file in fs::read_dir(directory)? { let tag_test_file = tag_test_file?; @@ -49,28 +58,40 @@ pub fn test_tags(loader: &Loader, directory: &Path) -> Result<()> { let test_file_name = tag_test_file.file_name(); let (language, language_config) = loader .language_configuration_for_file_name(&test_file_path)? - .ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?; + .ok_or_else(|| { + anyhow!( + "{}", + util::lang_not_found_for_path(test_file_path.as_path(), loader_config) + ) + })?; let tags_config = language_config .tags_config(language)? .ok_or_else(|| anyhow!("No tags config found for {:?}", test_file_path))?; match test_tag( - &mut tags_context, + tags_context, tags_config, fs::read(&test_file_path)?.as_slice(), ) { Ok(assertion_count) => { println!( - " ✓ {} ({} assertions)", - Colour::Green.paint(test_file_name.to_string_lossy().as_ref()), - assertion_count + " ✓ {} ({assertion_count} assertions)", + opt_color( + use_color, + Colour::Green, + test_file_name.to_string_lossy().as_ref() + ), ); } Err(e) => { println!( " ✗ {}", - Colour::Red.paint(test_file_name.to_string_lossy().as_ref()) + opt_color( + use_color, + Colour::Red, + test_file_name.to_string_lossy().as_ref() + ) ); - println!(" {}", e); + println!(" {e}"); failed = true; } } @@ -89,45 +110,45 @@ pub fn test_tag( source: &[u8], ) -> Result { let tags = get_tag_positions(tags_context, tags_config, source)?; - let assertions = parse_position_comments(tags_context.parser(), tags_config.language, source)?; + let assertions = parse_position_comments(tags_context.parser(), &tags_config.language, source)?; // Iterate through all of the assertions, checking against the actual tags. let mut i = 0; let mut actual_tags = Vec::<&String>::new(); for Assertion { position, + negative, expected_capture_name: expected_tag, } in &assertions { let mut passed = false; - 'tag_loop: loop { - if let Some(tag) = tags.get(i) { - if tag.1 <= *position { - i += 1; - continue; + 'tag_loop: while let Some(tag) = tags.get(i) { + if tag.1 <= *position { + i += 1; + continue; + } + + // Iterate through all of the tags that start at or before this assertion's + // position, looking for one that matches the assertion + let mut j = i; + while let (false, Some(tag)) = (passed, tags.get(j)) { + if tag.0 > *position { + break 'tag_loop; } - // Iterate through all of the tags that start at or before this assertion's - // position, looking for one that matches the assertion - let mut j = i; - while let (false, Some(tag)) = (passed, tags.get(j)) { - if tag.0 > *position { - break 'tag_loop; - } - - let tag_name = &tag.2; - if *tag_name == *expected_tag { - passed = true; - break 'tag_loop; - } else { - actual_tags.push(tag_name); - } - - j += 1; + let tag_name = &tag.2; + if (*tag_name == *expected_tag) == *negative { + actual_tags.push(tag_name); + } else { + passed = true; + break 'tag_loop; + } + + j += 1; + if tag == tags.last().unwrap() { + break 'tag_loop; } - } else { - break; } } @@ -150,15 +171,15 @@ pub fn get_tag_positions( tags_config: &TagsConfiguration, source: &[u8], ) -> Result> { - let (tags_iter, _has_error) = tags_context.generate_tags(&tags_config, &source, None)?; + let (tags_iter, _has_error) = tags_context.generate_tags(tags_config, source, None)?; let tag_positions = tags_iter - .filter_map(|t| t.ok()) + .filter_map(std::result::Result::ok) .map(|tag| { let tag_postfix = tags_config.syntax_type_name(tag.syntax_type_id).to_string(); let tag_name = if tag.is_definition { - format!("definition.{}", tag_postfix) + format!("definition.{tag_postfix}") } else { - format!("reference.{}", tag_postfix) + format!("reference.{tag_postfix}") }; (tag.span.start, tag.span.end, tag_name) }) diff --git a/cli/src/tests/async_context_test.rs b/cli/src/tests/async_context_test.rs new file mode 100644 index 0000000..cb2345c --- /dev/null +++ b/cli/src/tests/async_context_test.rs @@ -0,0 +1,284 @@ +use std::{ + future::Future, + pin::{pin, Pin}, + ptr, + task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker}, +}; + +use tree_sitter::Parser; + +use super::helpers::fixtures::get_language; + +#[test] +fn test_node_in_fut() { + let (ret, pended) = tokio_like_spawn(async { + let mut parser = Parser::new(); + let language = get_language("bash"); + parser.set_language(&language).unwrap(); + + let tree = parser.parse("#", None).unwrap(); + + let root = tree.root_node(); + let root_ref = &root; + + let fut_val_fn = || async { + // eprintln!("fut_val_fn: {}", root.child(0).unwrap().kind()); + yield_now().await; + root.child(0).unwrap().kind() + }; + + yield_now().await; + + let fut_ref_fn = || async { + // eprintln!("fut_ref_fn: {}", root_ref.child(0).unwrap().kind()); + yield_now().await; + root_ref.child(0).unwrap().kind() + }; + + let f1 = fut_val_fn().await; + let f2 = fut_ref_fn().await; + assert_eq!(f1, f2); + + let fut_val = async { + // eprintln!("fut_val: {}", root.child(0).unwrap().kind()); + yield_now().await; + root.child(0).unwrap().kind() + }; + + let fut_ref = async { + // eprintln!("fut_ref: {}", root_ref.child(0).unwrap().kind()); + yield_now().await; + root_ref.child(0).unwrap().kind() + }; + + let f1 = fut_val.await; + let f2 = fut_ref.await; + assert_eq!(f1, f2); + + f1 + }) + .join(); + // eprintln!("pended: {pended:?}"); + assert_eq!(ret, "comment"); + assert_eq!(pended, 5); +} + +#[test] +fn test_node_and_cursor_ref_in_fut() { + let ((), pended) = tokio_like_spawn(async { + let mut parser = Parser::new(); + let language = get_language("c"); + parser.set_language(&language).unwrap(); + + let tree = parser.parse("#", None).unwrap(); + + let root = tree.root_node(); + let root_ref = &root; + + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + + cursor_ref.goto_first_child(); + + let fut_val = async { + yield_now().await; + let _ = root.to_sexp(); + }; + + yield_now().await; + + let fut_ref = async { + yield_now().await; + let _ = root_ref.to_sexp(); + cursor_ref.goto_first_child(); + }; + + fut_val.await; + fut_ref.await; + + cursor_ref.goto_first_child(); + }) + .join(); + assert_eq!(pended, 3); +} + +#[test] +fn test_node_and_cursor_ref_in_fut_with_fut_fabrics() { + let ((), pended) = tokio_like_spawn(async { + let mut parser = Parser::new(); + let language = get_language("javascript"); + parser.set_language(&language).unwrap(); + + let tree = parser.parse("#", None).unwrap(); + + let root = tree.root_node(); + let root_ref = &root; + + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + + cursor_ref.goto_first_child(); + + let fut_val = || async { + yield_now().await; + let _ = root.to_sexp(); + }; + + yield_now().await; + + let fut_ref = || async move { + yield_now().await; + let _ = root_ref.to_sexp(); + cursor_ref.goto_first_child(); + }; + + fut_val().await; + fut_val().await; + fut_ref().await; + }) + .join(); + assert_eq!(pended, 4); +} + +#[test] +fn test_node_and_cursor_ref_in_fut_with_inner_spawns() { + let (ret, pended) = tokio_like_spawn(async { + let mut parser = Parser::new(); + let language = get_language("rust"); + parser.set_language(&language).unwrap(); + + let tree = parser.parse("#", None).unwrap(); + + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + + cursor_ref.goto_first_child(); + + let fut_val = || { + let tree = tree.clone(); + async move { + let root = tree.root_node(); + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + yield_now().await; + let _ = root.to_sexp(); + cursor_ref.goto_first_child(); + } + }; + + yield_now().await; + + let fut_ref = || { + let tree = tree.clone(); + async move { + let root = tree.root_node(); + let root_ref = &root; + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + yield_now().await; + let _ = root_ref.to_sexp(); + cursor_ref.goto_first_child(); + } + }; + + let ((), p1) = tokio_like_spawn(fut_val()).await.unwrap(); + let ((), p2) = tokio_like_spawn(fut_ref()).await.unwrap(); + + cursor_ref.goto_first_child(); + + fut_val().await; + fut_val().await; + fut_ref().await; + + cursor_ref.goto_first_child(); + + p1 + p2 + }) + .join(); + assert_eq!(pended, 4); + assert_eq!(ret, 2); +} + +fn tokio_like_spawn(future: T) -> JoinHandle<(T::Output, usize)> +where + T: Future + Send + 'static, + T::Output: Send + 'static, +{ + // No runtime, just noop waker + + let waker = noop_waker(); + let mut cx = task::Context::from_waker(&waker); + + let mut pending = 0; + let mut future = pin!(future); + let ret = loop { + match future.as_mut().poll(&mut cx) { + Poll::Pending => pending += 1, + Poll::Ready(r) => { + // eprintln!("ready, pended: {pending}"); + break r; + } + } + }; + JoinHandle::new((ret, pending)) +} + +async fn yield_now() { + struct SimpleYieldNow { + yielded: bool, + } + + impl Future for SimpleYieldNow { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { + cx.waker().wake_by_ref(); + if self.yielded { + return Poll::Ready(()); + } + self.yielded = true; + Poll::Pending + } + } + + SimpleYieldNow { yielded: false }.await; +} + +pub fn noop_waker() -> Waker { + const VTABLE: RawWakerVTable = RawWakerVTable::new( + // Cloning just returns a new no-op raw waker + |_| RAW, + // `wake` does nothing + |_| {}, + // `wake_by_ref` does nothing + |_| {}, + // Dropping does nothing as we don't allocate anything + |_| {}, + ); + const RAW: RawWaker = RawWaker::new(ptr::null(), &VTABLE); + unsafe { Waker::from_raw(RAW) } +} + +struct JoinHandle { + data: Option, +} + +impl JoinHandle { + #[must_use] + const fn new(data: T) -> Self { + Self { data: Some(data) } + } + + fn join(&mut self) -> T { + self.data.take().unwrap() + } +} + +impl Future for JoinHandle { + type Output = std::result::Result; + + fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll { + let data = self.get_mut().data.take().unwrap(); + Poll::Ready(Ok(data)) + } +} diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index b818b2c..5b1e78f 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -1,7 +1,12 @@ +use std::{collections::HashMap, env, fs}; + +use tree_sitter::{LogType, Node, Parser, Point, Range, Tree}; +use tree_sitter_proc_macro::test_with_seed; + use super::helpers::{ allocations, edits::{get_random_edit, invert_edit}, - fixtures::{fixtures_dir, get_language, get_test_language}, + fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR}, new_seed, random::Rand, scope_sequence::ScopeSequence, @@ -14,85 +19,119 @@ use crate::{ test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry}, util, }; -use proc_macro::test_with_seed; -use std::{env, fs}; -use tree_sitter::{LogType, Node, Parser, Point, Range, Tree}; #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_bash(seed: usize) { - test_language_corpus(seed, "bash"); + test_language_corpus( + "bash", + seed, + Some(&[ + // Fragile tests where edit customization changes + // lead to significant parse tree structure changes. + "bash - corpus - commands - Nested Heredocs", + "bash - corpus - commands - Quoted Heredocs", + "bash - corpus - commands - Heredocs with weird characters", + ]), + None, + ); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_c(seed: usize) { - test_language_corpus(seed, "c"); + test_language_corpus("c", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_cpp(seed: usize) { - test_language_corpus(seed, "cpp"); + test_language_corpus("cpp", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_embedded_template(seed: usize) { - test_language_corpus(seed, "embedded-template"); + test_language_corpus("embedded-template", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_go(seed: usize) { - test_language_corpus(seed, "go"); + test_language_corpus("go", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_html(seed: usize) { - test_language_corpus(seed, "html"); + test_language_corpus("html", seed, None, None); +} + +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_java(seed: usize) { + test_language_corpus("java", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_javascript(seed: usize) { - test_language_corpus(seed, "javascript"); + test_language_corpus("javascript", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_json(seed: usize) { - test_language_corpus(seed, "json"); + test_language_corpus("json", seed, None, None); } +#[ignore] #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_php(seed: usize) { - test_language_corpus(seed, "php"); + test_language_corpus("php", seed, None, Some("php")); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_python(seed: usize) { - test_language_corpus(seed, "python"); + test_language_corpus("python", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_ruby(seed: usize) { - test_language_corpus(seed, "ruby"); + test_language_corpus("ruby", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_rust(seed: usize) { - test_language_corpus(seed, "rust"); + test_language_corpus("rust", seed, None, None); } -fn test_language_corpus(start_seed: usize, language_name: &str) { +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_typescript(seed: usize) { + test_language_corpus("typescript", seed, None, Some("typescript")); +} + +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_tsx(seed: usize) { + test_language_corpus("typescript", seed, None, Some("tsx")); +} + +fn test_language_corpus( + language_name: &str, + start_seed: usize, + skipped: Option<&[&str]>, + subdir: Option<&str>, +) { + let subdir = subdir.unwrap_or_default(); + let grammars_dir = fixtures_dir().join("grammars"); let error_corpus_dir = fixtures_dir().join("error_corpus"); let template_corpus_dir = fixtures_dir().join("template_corpus"); - let mut corpus_dir = grammars_dir.join(language_name).join("corpus"); + let mut corpus_dir = grammars_dir.join(language_name).join(subdir).join("corpus"); if !corpus_dir.is_dir() { - corpus_dir = grammars_dir.join(language_name).join("test").join("corpus"); + corpus_dir = grammars_dir + .join(language_name) + .join(subdir) + .join("test") + .join("corpus"); } - let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name)); - let template_corpus_file = - template_corpus_dir.join(&format!("{}_templates.txt", language_name)); + let error_corpus_file = error_corpus_dir.join(format!("{language_name}_errors.txt")); + let template_corpus_file = template_corpus_dir.join(format!("{language_name}_templates.txt")); let main_tests = parse_tests(&corpus_dir).unwrap(); - let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default()); - let template_tests = parse_tests(&template_corpus_file).unwrap_or(TestEntry::default()); + let error_tests = parse_tests(&error_corpus_file).unwrap_or_default(); + let template_tests = parse_tests(&template_corpus_file).unwrap_or_default(); let mut tests = flatten_tests(main_tests); tests.extend(flatten_tests(error_tests)); tests.extend(flatten_tests(template_tests).into_iter().map(|mut t| { @@ -100,35 +139,53 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { t })); - let language = get_language(language_name); + let mut skipped = skipped.map(|x| x.iter().map(|x| (*x, 0)).collect::>()); + + let language_path = if subdir.is_empty() { + language_name.to_string() + } else { + format!("{language_name}/{subdir}") + }; + let language = get_language(&language_path); let mut failure_count = 0; let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok(); + let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok(); + + if log_seed { + println!(" start seed: {start_seed}"); + } println!(); - for test in tests { - println!(" {} example - {}", language_name, test.name); + for (test_index, test) in tests.iter().enumerate() { + let test_name = format!("{language_name} - {}", test.name); + if let Some(skipped) = skipped.as_mut() { + if let Some(counter) = skipped.get_mut(test_name.as_str()) { + println!(" {test_index}. {test_name} - SKIPPED"); + *counter += 1; + continue; + } + } + + println!(" {test_index}. {test_name}"); let passed = allocations::record(|| { let mut log_session = None; let mut parser = get_parser(&mut log_session, "log.html"); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); set_included_ranges(&mut parser, &test.input, test.template_delimiters); let tree = parser.parse(&test.input, None).unwrap(); let mut actual_output = tree.root_node().to_sexp(); if !test.has_fields { - actual_output = strip_sexp_fields(actual_output); + actual_output = strip_sexp_fields(&actual_output); } if actual_output != test.output { - println!( - "Incorrect initial parse for {} - {}", - language_name, test.name, - ); + println!("Incorrect initial parse for {test_name}"); print_diff_key(); - print_diff(&actual_output, &test.output); - println!(""); + print_diff(&actual_output, &test.output, true); + println!(); return false; } @@ -141,7 +198,7 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { } let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&test.input, None).unwrap(); drop(parser); @@ -151,7 +208,7 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { let mut rand = Rand::new(seed); let mut log_session = None; let mut parser = get_parser(&mut log_session, "log.html"); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let mut tree = tree.clone(); let mut input = test.input.clone(); @@ -161,14 +218,23 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { // Perform a random series of edits and reparse. let mut undo_stack = Vec::new(); - for _ in 0..1 + rand.unsigned(*EDIT_COUNT) { + for _ in 0..=rand.unsigned(*EDIT_COUNT) { let edit = get_random_edit(&mut rand, &input); undo_stack.push(invert_edit(&input, &edit)); - perform_edit(&mut tree, &mut input, &edit); + perform_edit(&mut tree, &mut input, &edit).unwrap(); } if log_seed { - println!(" seed: {}", seed); + println!(" {test_index}.{trial:<2} seed: {seed}"); + } + + if dump_edits { + fs::write( + SCRATCH_BASE_DIR + .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")), + &input, + ) + .unwrap(); } if *LOG_GRAPH_ENABLED { @@ -187,7 +253,7 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { // Undo all of the edits and re-parse again. while let Some(edit) = undo_stack.pop() { - perform_edit(&mut tree2, &mut input, &edit); + perform_edit(&mut tree2, &mut input, &edit).unwrap(); } if *LOG_GRAPH_ENABLED { eprintln!("{}\n", String::from_utf8_lossy(&input)); @@ -199,17 +265,14 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { // Verify that the final tree matches the expectation from the corpus. let mut actual_output = tree3.root_node().to_sexp(); if !test.has_fields { - actual_output = strip_sexp_fields(actual_output); + actual_output = strip_sexp_fields(&actual_output); } if actual_output != test.output { - println!( - "Incorrect parse for {} - {} - seed {}", - language_name, test.name, seed - ); + println!("Incorrect parse for {test_name} - seed {seed}"); print_diff_key(); - print_diff(&actual_output, &test.output); - println!(""); + print_diff(&actual_output, &test.output, true); + println!(); return false; } @@ -230,8 +293,21 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { } } - if failure_count > 0 { - panic!("{} {} corpus tests failed", failure_count, language_name); + assert!( + failure_count == 0, + "{failure_count} {language_name} corpus tests failed" + ); + + if let Some(skipped) = skipped.as_mut() { + skipped.retain(|_, v| *v == 0); + + if !skipped.is_empty() { + println!("Non matchable skip definitions:"); + for k in skipped.keys() { + println!(" {k}"); + } + panic!("Non matchable skip definitions needs to be removed"); + } } } @@ -240,7 +316,7 @@ fn test_feature_corpus_files() { let test_grammars_dir = fixtures_dir().join("test_grammars"); let mut failure_count = 0; - for entry in fs::read_dir(&test_grammars_dir).unwrap() { + for entry in fs::read_dir(test_grammars_dir).unwrap() { let entry = entry.unwrap(); if !entry.metadata().unwrap().is_dir() { continue; @@ -260,7 +336,7 @@ fn test_feature_corpus_files() { grammar_path = test_path.join("grammar.json"); } let error_message_path = test_path.join("expected_error.txt"); - let grammar_json = generate::load_grammar_file(&grammar_path).unwrap(); + let grammar_json = generate::load_grammar_file(&grammar_path, None).unwrap(); let generate_result = generate::generate_parser_for_grammar(&grammar_json); if error_message_path.exists() { @@ -268,7 +344,7 @@ fn test_feature_corpus_files() { continue; } - eprintln!("test language: {:?}", language_name); + eprintln!("test language: {language_name:?}"); let expected_message = fs::read_to_string(&error_message_path) .unwrap() @@ -277,24 +353,17 @@ fn test_feature_corpus_files() { let actual_message = e.to_string().replace("\r\n", "\n"); if expected_message != actual_message { eprintln!( - "Unexpected error message.\n\nExpected:\n\n{}\nActual:\n\n{}\n", - expected_message, actual_message + "Unexpected error message.\n\nExpected:\n\n{expected_message}\nActual:\n\n{actual_message}\n", ); failure_count += 1; } } else { - eprintln!( - "Expected error message but got none for test grammar '{}'", - language_name - ); + eprintln!("Expected error message but got none for test grammar '{language_name}'",); failure_count += 1; } } else { if let Err(e) = &generate_result { - eprintln!( - "Unexpected error for test grammar '{}':\n{}", - language_name, e - ); + eprintln!("Unexpected error for test grammar '{language_name}':\n{e}",); failure_count += 1; continue; } @@ -306,7 +375,7 @@ fn test_feature_corpus_files() { let tests = flatten_tests(test); if !tests.is_empty() { - eprintln!("test language: {:?}", language_name); + eprintln!("test language: {language_name:?}"); } for test in tests { @@ -315,18 +384,18 @@ fn test_feature_corpus_files() { let passed = allocations::record(|| { let mut log_session = None; let mut parser = get_parser(&mut log_session, "log.html"); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&test.input, None).unwrap(); let mut actual_output = tree.root_node().to_sexp(); if !test.has_fields { - actual_output = strip_sexp_fields(actual_output); + actual_output = strip_sexp_fields(&actual_output); } if actual_output == test.output { true } else { print_diff_key(); - print_diff(&actual_output, &test.output); - println!(""); + print_diff(&actual_output, &test.output, true); + println!(); false } }); @@ -338,13 +407,12 @@ fn test_feature_corpus_files() { } } } - if failure_count > 0 { - panic!("{} corpus tests failed", failure_count); - } + + assert!(failure_count == 0, "{failure_count} corpus tests failed"); } -fn check_consistent_sizes(tree: &Tree, input: &Vec) { - fn check(node: Node, line_offsets: &Vec) { +fn check_consistent_sizes(tree: &Tree, input: &[u8]) { + fn check(node: Node, line_offsets: &[usize]) { let start_byte = node.start_byte(); let end_byte = node.end_byte(); let start_point = node.start_position(); @@ -391,7 +459,7 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec) { let mut line_offsets = vec![0]; for (i, c) in input.iter().enumerate() { - if *c == '\n' as u8 { + if *c == b'\n' { line_offsets.push(i + 1); } } @@ -399,7 +467,7 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec) { check(tree.root_node(), &line_offsets); } -fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec) -> Result<(), String> { +fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &[u8]) -> Result<(), String> { let changed_ranges = old_tree.changed_ranges(new_tree).collect::>(); let old_scope_sequence = ScopeSequence::new(old_tree); let new_scope_sequence = ScopeSequence::new(new_tree); @@ -415,13 +483,12 @@ fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec) -> Re for range in &changed_ranges { if range.end_byte > byte_range.end || range.end_point > point_range.end { return Err(format!( - "changed range extends outside of the old and new trees {:?}", - range + "changed range extends outside of the old and new trees {range:?}", )); } } - old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges) + old_scope_sequence.check_changes(&new_scope_sequence, input, &changed_ranges) } fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) { @@ -429,7 +496,12 @@ fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&s let mut ranges = Vec::new(); let mut ix = 0; while ix < input.len() { - let Some(mut start_ix) = input[ix..].windows(2).position(|win| win == start.as_bytes()) else { break }; + let Some(mut start_ix) = input[ix..] + .windows(2) + .position(|win| win == start.as_bytes()) + else { + break; + }; start_ix += ix + start.len(); let end_ix = input[start_ix..] .windows(2) @@ -469,13 +541,13 @@ fn get_parser(session: &mut Option, log_filename: &str) -> Par if *LOG_ENABLED { parser.set_logger(Some(Box::new(|log_type, msg| { if log_type == LogType::Lex { - eprintln!(" {}", msg); + eprintln!(" {msg}"); } else { - eprintln!("{}", msg); + eprintln!("{msg}"); } }))); } else if *LOG_GRAPH_ENABLED { - *session = Some(util::log_graphs(&mut parser, log_filename).unwrap()); + *session = Some(util::log_graphs(&mut parser, log_filename, false).unwrap()); } parser @@ -497,6 +569,7 @@ fn flatten_tests(test: TestEntry) -> Vec { input, output, has_fields, + .. } => { if !prefix.is_empty() { name.insert_str(0, " - "); diff --git a/cli/src/tests/detect_language.rs b/cli/src/tests/detect_language.rs new file mode 100644 index 0000000..db313f5 --- /dev/null +++ b/cli/src/tests/detect_language.rs @@ -0,0 +1,134 @@ +use std::{fs, path::Path}; + +use tree_sitter_loader::Loader; + +use crate::tests::helpers::fixtures::scratch_dir; + +#[test] +fn detect_language_by_first_line_regex() { + let strace_dir = tree_sitter_dir( + r#"{ + "name": "tree-sitter-strace", + "version": "0.0.1", + "tree-sitter": [ + { + "scope": "source.strace", + "file-types": [ + "strace" + ], + "first-line-regex": "[0-9:.]* *execve" + } + ] +} +"#, + "strace", + ); + + let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf()); + let config = loader + .find_language_configurations_at_path(strace_dir.path(), false) + .unwrap(); + + // this is just to validate that we can read the package.json correctly + assert_eq!(config[0].scope.as_ref().unwrap(), "source.strace"); + + let file_name = strace_dir.path().join("strace.log"); + fs::write(&file_name, "execve\nworld").unwrap(); + assert_eq!( + get_lang_scope(&loader, &file_name), + Some("source.strace".into()) + ); + + let file_name = strace_dir.path().join("strace.log"); + fs::write(&file_name, "447845 execve\nworld").unwrap(); + assert_eq!( + get_lang_scope(&loader, &file_name), + Some("source.strace".into()) + ); + + let file_name = strace_dir.path().join("strace.log"); + fs::write(&file_name, "hello\nexecve").unwrap(); + assert!(get_lang_scope(&loader, &file_name).is_none()); + + let file_name = strace_dir.path().join("strace.log"); + fs::write(&file_name, "").unwrap(); + assert!(get_lang_scope(&loader, &file_name).is_none()); + + let dummy_dir = tree_sitter_dir( + r#"{ + "name": "tree-sitter-dummy", + "version": "0.0.1", + "tree-sitter": [ + { + "scope": "source.dummy", + "file-types": [ + "dummy" + ] + } + ] +} +"#, + "dummy", + ); + + // file-type takes precedence over first-line-regex + loader + .find_language_configurations_at_path(dummy_dir.path(), false) + .unwrap(); + let file_name = dummy_dir.path().join("strace.dummy"); + fs::write(&file_name, "execve").unwrap(); + assert_eq!( + get_lang_scope(&loader, &file_name), + Some("source.dummy".into()) + ); +} + +fn tree_sitter_dir(package_json: &str, name: &str) -> tempfile::TempDir { + let temp_dir = tempfile::tempdir().unwrap(); + fs::write(temp_dir.path().join("package.json"), package_json).unwrap(); + fs::create_dir_all(temp_dir.path().join("src/tree_sitter")).unwrap(); + fs::write( + temp_dir.path().join("src/grammar.json"), + format!(r#"{{"name":"{name}"}}"#), + ) + .unwrap(); + fs::write( + temp_dir.path().join("src/parser.c"), + format!( + r##" + #include "tree_sitter/parser.h" + #ifdef _WIN32 + #define TS_PUBLIC __declspec(dllexport) + #else + #define TS_PUBLIC __attribute__((visibility("default"))) + #endif + TS_PUBLIC const TSLanguage *tree_sitter_{name}() {{}} + "## + ), + ) + .unwrap(); + fs::write( + temp_dir.path().join("src/tree_sitter/parser.h"), + include_str!("../../../lib/src/parser.h"), + ) + .unwrap(); + temp_dir +} + +// If we manage to get the language scope, it means we correctly detected the file-type +fn get_lang_scope(loader: &Loader, file_name: &Path) -> Option { + loader + .language_configuration_for_file_name(file_name) + .ok() + .and_then(|config| { + if let Some((_, config)) = config { + config.scope.clone() + } else if let Ok(Some((_, config))) = + loader.language_configuration_for_first_line_regex(file_name) + { + config.scope.clone() + } else { + None + } + }) +} diff --git a/cli/src/tests/helpers/allocations.rs b/cli/src/tests/helpers/allocations.rs index 9a51401..103cb09 100644 --- a/cli/src/tests/helpers/allocations.rs +++ b/cli/src/tests/helpers/allocations.rs @@ -2,7 +2,7 @@ use std::{ collections::HashMap, os::raw::c_void, sync::{ - atomic::{AtomicBool, AtomicU64, Ordering::SeqCst}, + atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst}, Mutex, }, }; @@ -25,12 +25,12 @@ unsafe impl Sync for Allocation {} #[derive(Default)] struct AllocationRecorder { enabled: AtomicBool, - allocation_count: AtomicU64, - outstanding_allocations: Mutex>, + allocation_count: AtomicUsize, + outstanding_allocations: Mutex>, } thread_local! { - static RECORDER: AllocationRecorder = Default::default(); + static RECORDER: AllocationRecorder = AllocationRecorder::default(); } extern "C" { @@ -60,12 +60,10 @@ pub fn record(f: impl FnOnce() -> T) -> T { .map(|e| e.1) .collect::>() }); - if !outstanding_allocation_indices.is_empty() { - panic!( - "Leaked allocation indices: {:?}", - outstanding_allocation_indices - ); - } + assert!( + outstanding_allocation_indices.is_empty(), + "Leaked allocation indices: {outstanding_allocation_indices:?}" + ); value } @@ -107,9 +105,13 @@ unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void } unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void { - record_dealloc(ptr); let result = realloc(ptr, size); - record_alloc(result); + if ptr.is_null() { + record_alloc(result); + } else if ptr != result { + record_dealloc(ptr); + record_alloc(result); + } result } diff --git a/cli/src/tests/helpers/dirs.rs b/cli/src/tests/helpers/dirs.rs index 4bf345d..4d1c498 100644 --- a/cli/src/tests/helpers/dirs.rs +++ b/cli/src/tests/helpers/dirs.rs @@ -1,11 +1,47 @@ lazy_static! { - static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned(); - static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures"); - static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include"); - static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars"); - static ref SCRATCH_DIR: PathBuf = { + pub static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned(); + pub static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures"); + pub static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include"); + pub static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars"); + pub static ref SCRATCH_BASE_DIR: PathBuf = { let result = ROOT_DIR.join("target").join("scratch"); fs::create_dir_all(&result).unwrap(); result }; + pub static ref WASM_DIR: PathBuf = ROOT_DIR.join("target").join("release"); + pub static ref SCRATCH_DIR: PathBuf = { + // https://doc.rust-lang.org/reference/conditional-compilation.html + let vendor = if cfg!(target_vendor = "apple") { + "apple" + } else if cfg!(target_vendor = "fortanix") { + "fortanix" + } else if cfg!(target_vendor = "pc") { + "pc" + } else { + "unknown" + }; + let env = if cfg!(target_env = "gnu") { + "gnu" + } else if cfg!(target_env = "msvc") { + "msvc" + } else if cfg!(target_env = "musl") { + "musl" + } else if cfg!(target_env = "sgx") { + "sgx" + } else { + "unknown" + }; + let endian = if cfg!(target_endian = "little") { + "little" + } else if cfg!(target_endian = "big") { + "big" + } else { + "unknown" + }; + + let machine = format!("{}-{}-{vendor}-{env}-{endian}", std::env::consts::ARCH, std::env::consts::OS); + let result = SCRATCH_BASE_DIR.join(machine); + fs::create_dir_all(&result).unwrap(); + result + }; } diff --git a/cli/src/tests/helpers/edits.rs b/cli/src/tests/helpers/edits.rs index 4b07485..f6172bb 100644 --- a/cli/src/tests/helpers/edits.rs +++ b/cli/src/tests/helpers/edits.rs @@ -1,16 +1,17 @@ +use std::{ops::Range, str}; + use super::random::Rand; use crate::parse::Edit; -use std::ops::Range; -use std::str; #[derive(Debug)] pub struct ReadRecorder<'a> { - content: &'a Vec, + content: &'a [u8], indices_read: Vec, } impl<'a> ReadRecorder<'a> { - pub fn new(content: &'a Vec) -> Self { + #[must_use] + pub const fn new(content: &'a [u8]) -> Self { Self { content, indices_read: Vec::new(), @@ -30,8 +31,8 @@ impl<'a> ReadRecorder<'a> { pub fn strings_read(&self) -> Vec<&'a str> { let mut result = Vec::new(); - let mut last_range: Option> = None; - for index in self.indices_read.iter() { + let mut last_range = Option::>::None; + for index in &self.indices_read { if let Some(ref mut range) = &mut last_range { if range.end == *index { range.end += 1; @@ -44,13 +45,13 @@ impl<'a> ReadRecorder<'a> { } } if let Some(range) = last_range { - result.push(str::from_utf8(&self.content[range.clone()]).unwrap()); + result.push(str::from_utf8(&self.content[range]).unwrap()); } result } } -pub fn invert_edit(input: &Vec, edit: &Edit) -> Edit { +pub fn invert_edit(input: &[u8], edit: &Edit) -> Edit { let position = edit.position; let removed_content = &input[position..(position + edit.deleted_length)]; Edit { @@ -60,7 +61,7 @@ pub fn invert_edit(input: &Vec, edit: &Edit) -> Edit { } } -pub fn get_random_edit(rand: &mut Rand, input: &Vec) -> Edit { +pub fn get_random_edit(rand: &mut Rand, input: &[u8]) -> Edit { let choice = rand.unsigned(10); if choice < 2 { // Insert text at end diff --git a/cli/src/tests/helpers/fixtures.rs b/cli/src/tests/helpers/fixtures.rs index 7d04b24..786c18f 100644 --- a/cli/src/tests/helpers/fixtures.rs +++ b/cli/src/tests/helpers/fixtures.rs @@ -1,29 +1,46 @@ +use std::{ + env, fs, + path::{Path, PathBuf}, +}; + +use anyhow::Context; use lazy_static::lazy_static; -use std::fs; -use std::path::{Path, PathBuf}; use tree_sitter::Language; use tree_sitter_highlight::HighlightConfiguration; -use tree_sitter_loader::Loader; +use tree_sitter_loader::{CompileConfig, Loader}; use tree_sitter_tags::TagsConfiguration; +use crate::generate::ALLOC_HEADER; + include!("./dirs.rs"); lazy_static! { - static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone()); + static ref TEST_LOADER: Loader = { + let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone()); + if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() { + loader.use_debug_build(true); + } + loader + }; } -pub fn test_loader<'a>() -> &'a Loader { - &*TEST_LOADER +pub fn test_loader() -> &'static Loader { + &TEST_LOADER } -pub fn fixtures_dir<'a>() -> &'static Path { +pub fn fixtures_dir() -> &'static Path { &FIXTURES_DIR } +pub fn scratch_dir() -> &'static Path { + &SCRATCH_DIR +} + pub fn get_language(name: &str) -> Language { - TEST_LOADER - .load_language_at_path(&GRAMMARS_DIR.join(name).join("src"), &HEADER_DIR) - .unwrap() + let src_dir = GRAMMARS_DIR.join(name).join("src"); + let mut config = CompileConfig::new(&src_dir, None, None); + config.header_paths.push(&HEADER_DIR); + TEST_LOADER.load_language_at_path(config).unwrap() } pub fn get_language_queries_path(language_name: &str) -> PathBuf { @@ -38,20 +55,20 @@ pub fn get_highlight_config( let language = get_language(language_name); let queries_path = get_language_queries_path(language_name); let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap(); - let injections_query = if let Some(injection_query_filename) = injection_query_filename { - fs::read_to_string(queries_path.join(injection_query_filename)).unwrap() - } else { - String::new() - }; - let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new()); + let injections_query = + injection_query_filename.map_or_else(String::new, |injection_query_filename| { + fs::read_to_string(queries_path.join(injection_query_filename)).unwrap() + }); + let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default(); let mut result = HighlightConfiguration::new( language, + language_name, &highlights_query, &injections_query, &locals_query, ) .unwrap(); - result.configure(&highlight_names); + result.configure(highlight_names); result } @@ -59,33 +76,62 @@ pub fn get_tags_config(language_name: &str) -> TagsConfiguration { let language = get_language(language_name); let queries_path = get_language_queries_path(language_name); let tags_query = fs::read_to_string(queries_path.join("tags.scm")).unwrap(); - let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new()); + let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default(); TagsConfiguration::new(language, &tags_query, &locals_query).unwrap() } pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language { - let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", name)); - if !fs::read_to_string(&parser_c_path) - .map(|content| content == parser_code) - .unwrap_or(false) - { - fs::write(&parser_c_path, parser_code).unwrap(); + let src_dir = scratch_dir().join("src").join(name); + fs::create_dir_all(&src_dir).unwrap(); + + let parser_path = src_dir.join("parser.c"); + if !fs::read_to_string(&parser_path).map_or(false, |content| content == parser_code) { + fs::write(&parser_path, parser_code).unwrap(); } - let scanner_path = path.and_then(|p| { - let result = p.join("scanner.c"); - if result.exists() { - Some(result) + + let scanner_path = if let Some(path) = path { + let scanner_path = path.join("scanner.c"); + if scanner_path.exists() { + let scanner_code = fs::read_to_string(&scanner_path).unwrap(); + let scanner_copy_path = src_dir.join("scanner.c"); + if !fs::read_to_string(&scanner_copy_path) + .map_or(false, |content| content == scanner_code) + { + fs::write(&scanner_copy_path, scanner_code).unwrap(); + } + Some(scanner_copy_path) } else { None } + } else { + None + }; + + let header_path = src_dir.join("tree_sitter"); + fs::create_dir_all(&header_path).unwrap(); + + [ + ("alloc.h", ALLOC_HEADER), + ("array.h", tree_sitter::ARRAY_HEADER), + ("parser.h", tree_sitter::PARSER_HEADER), + ] + .iter() + .for_each(|(file, content)| { + let file = header_path.join(file); + fs::write(&file, content) + .with_context(|| format!("Failed to write {:?}", file.file_name().unwrap())) + .unwrap(); }); - TEST_LOADER - .load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path) - .unwrap() -} -pub fn get_test_grammar(name: &str) -> (String, Option) { - let dir = fixtures_dir().join("test_grammars").join(name); - let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap(); - (grammar, Some(dir)) + let paths_to_check = if let Some(scanner_path) = &scanner_path { + vec![parser_path, scanner_path.clone()] + } else { + vec![parser_path] + }; + + let mut config = CompileConfig::new(&src_dir, Some(&paths_to_check), None); + config.header_paths = vec![&HEADER_DIR]; + config.name = name.to_string(); + + TEST_LOADER.load_language_at_path_with_name(config).unwrap() } diff --git a/cli/src/tests/helpers/mod.rs b/cli/src/tests/helpers/mod.rs index 54df880..229c798 100644 --- a/cli/src/tests/helpers/mod.rs +++ b/cli/src/tests/helpers/mod.rs @@ -5,9 +5,10 @@ pub(super) mod query_helpers; pub(super) mod random; pub(super) mod scope_sequence; +use std::env; + use lazy_static::lazy_static; use rand::Rng; -use std::env; lazy_static! { pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok(); @@ -26,7 +27,7 @@ fn int_env_var(name: &'static str) -> Option { env::var(name).ok().and_then(|e| e.parse().ok()) } -pub(crate) fn new_seed() -> usize { +pub fn new_seed() -> usize { int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| { let mut rng = rand::thread_rng(); rng.gen::() diff --git a/cli/src/tests/helpers/query_helpers.rs b/cli/src/tests/helpers/query_helpers.rs index 78ae559..9e7a6f6 100644 --- a/cli/src/tests/helpers/query_helpers.rs +++ b/cli/src/tests/helpers/query_helpers.rs @@ -1,6 +1,9 @@ -use rand::prelude::Rng; use std::{cmp::Ordering, fmt::Write, ops::Range}; -use tree_sitter::{Node, Point, Tree, TreeCursor}; + +use rand::prelude::Rng; +use tree_sitter::{ + Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor, +}; #[derive(Debug)] pub struct Pattern { @@ -17,7 +20,7 @@ pub struct Match<'a, 'tree> { pub last_node: Option>, } -const CAPTURE_NAMES: &'static [&'static str] = &[ +const CAPTURE_NAMES: &[&str] = &[ "one", "two", "three", "four", "five", "six", "seven", "eight", ]; @@ -55,12 +58,11 @@ impl Pattern { children: roots, }; - if pattern.children.len() == 1 { - pattern = pattern.children.pop().unwrap(); - } + if pattern.children.len() == 1 || // In a parenthesized list of sibling patterns, the first // sibling can't be an anonymous `_` wildcard. - else if pattern.children[0].kind == Some("_") && !pattern.children[0].named { + (pattern.children[0].kind == Some("_") && !pattern.children[0].named) + { pattern = pattern.children.pop().unwrap(); } // In a parenthesized list of sibling patterns, the first @@ -121,22 +123,16 @@ impl Pattern { } } - pub fn to_string(&self) -> String { - let mut result = String::new(); - self.write_to_string(&mut result, 0); - result - } - fn write_to_string(&self, string: &mut String, indent: usize) { if let Some(field) = self.field { - write!(string, "{}: ", field).unwrap(); + write!(string, "{field}: ").unwrap(); } if self.named { string.push('('); let mut has_contents = false; if let Some(kind) = &self.kind { - write!(string, "{}", kind).unwrap(); + write!(string, "{kind}").unwrap(); has_contents = true; } for child in &self.children { @@ -152,11 +148,11 @@ impl Pattern { } else if self.kind == Some("_") { string.push('_'); } else { - write!(string, "\"{}\"", self.kind.unwrap().replace("\"", "\\\"")).unwrap(); + write!(string, "\"{}\"", self.kind.unwrap().replace('\"', "\\\"")).unwrap(); } if let Some(capture) = &self.capture { - write!(string, " @{}", capture).unwrap(); + write!(string, " @{capture}").unwrap(); } } @@ -212,11 +208,10 @@ impl Pattern { // Create a match for the current node. let mat = Match { - captures: if let Some(name) = &self.capture { - vec![(name.as_str(), node)] - } else { - Vec::new() - }, + captures: self + .capture + .as_ref() + .map_or_else(Vec::new, |name| vec![(name.as_str(), node)]), last_node: Some(node), }; @@ -244,7 +239,7 @@ impl Pattern { new_match_states.push((*pattern_index + 1, combined_match)); } else { let mut existing = false; - for existing_match in finished_matches.iter_mut() { + for existing_match in &mut finished_matches { if existing_match.captures == combined_match.captures { if child_pattern.capture.is_some() { existing_match.last_node = combined_match.last_node; @@ -269,6 +264,14 @@ impl Pattern { } } +impl std::fmt::Display for Pattern { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut result = String::new(); + self.write_to_string(&mut result, 0); + write!(f, "{result}") + } +} + impl<'a, 'tree> PartialOrd for Match<'a, 'tree> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) @@ -304,3 +307,56 @@ fn compare_depth_first(a: Node, b: Node) -> Ordering { let b = b.byte_range(); a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end)) } + +pub fn assert_query_matches( + language: &Language, + query: &Query, + source: &str, + expected: &[(usize, Vec<(&str, &str)>)], +) { + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(query, tree.root_node(), source.as_bytes()); + pretty_assertions::assert_eq!(collect_matches(matches, query, source), expected); + pretty_assertions::assert_eq!(cursor.did_exceed_match_limit(), false); +} + +pub fn collect_matches<'a>( + matches: impl Iterator>, + query: &'a Query, + source: &'a str, +) -> Vec<(usize, Vec<(&'a str, &'a str)>)> { + matches + .map(|m| { + ( + m.pattern_index, + format_captures(m.captures.iter().copied(), query, source), + ) + }) + .collect() +} + +pub fn collect_captures<'a>( + captures: impl Iterator, usize)>, + query: &'a Query, + source: &'a str, +) -> Vec<(&'a str, &'a str)> { + format_captures(captures.map(|(m, i)| m.captures[i]), query, source) +} + +fn format_captures<'a>( + captures: impl Iterator>, + query: &'a Query, + source: &'a str, +) -> Vec<(&'a str, &'a str)> { + captures + .map(|capture| { + ( + query.capture_names()[capture.index as usize], + capture.node.utf8_text(source.as_bytes()).unwrap(), + ) + }) + .collect() +} diff --git a/cli/src/tests/helpers/random.rs b/cli/src/tests/helpers/random.rs index 77c347d..a4069b3 100644 --- a/cli/src/tests/helpers/random.rs +++ b/cli/src/tests/helpers/random.rs @@ -11,11 +11,11 @@ pub struct Rand(StdRng); impl Rand { pub fn new(seed: usize) -> Self { - Rand(StdRng::seed_from_u64(seed as u64)) + Self(StdRng::seed_from_u64(seed as u64)) } pub fn unsigned(&mut self, max: usize) -> usize { - self.0.gen_range(0..max + 1) + self.0.gen_range(0..=max) } pub fn words(&mut self, max_count: usize) -> Vec { @@ -24,9 +24,9 @@ impl Rand { for i in 0..word_count { if i > 0 { if self.unsigned(5) == 0 { - result.push('\n' as u8); + result.push(b'\n'); } else { - result.push(' ' as u8); + result.push(b' '); } } if self.unsigned(3) == 0 { @@ -34,7 +34,7 @@ impl Rand { result.push(OPERATORS[index] as u8); } else { for _ in 0..self.unsigned(8) { - result.push(self.0.sample(Alphanumeric) as u8); + result.push(self.0.sample(Alphanumeric)); } } } diff --git a/cli/src/tests/helpers/scope_sequence.rs b/cli/src/tests/helpers/scope_sequence.rs index 4521833..436455d 100644 --- a/cli/src/tests/helpers/scope_sequence.rs +++ b/cli/src/tests/helpers/scope_sequence.rs @@ -7,7 +7,7 @@ type ScopeStack = Vec<&'static str>; impl ScopeSequence { pub fn new(tree: &Tree) -> Self { - let mut result = ScopeSequence(Vec::new()); + let mut result = Self(Vec::new()); let mut scope_stack = Vec::new(); let mut cursor = tree.walk(); @@ -40,9 +40,9 @@ impl ScopeSequence { pub fn check_changes( &self, - other: &ScopeSequence, - text: &Vec, - known_changed_ranges: &Vec, + other: &Self, + text: &[u8], + known_changed_ranges: &[Range], ) -> Result<(), String> { let mut position = Point { row: 0, column: 0 }; for i in 0..(self.0.len().max(other.0.len())) { @@ -54,7 +54,7 @@ impl ScopeSequence { .find(|range| range.start_point <= position && position < range.end_point); if containing_range.is_none() { let line = &text[(i - position.column)..] - .split(|c| *c == '\n' as u8) + .split(|c| *c == b'\n') .next() .unwrap(); return Err(format!( @@ -78,7 +78,7 @@ impl ScopeSequence { } } - if text[i] == '\n' as u8 { + if text[i] == b'\n' { position.row += 1; position.column = 0; } else { diff --git a/cli/src/tests/highlight_test.rs b/cli/src/tests/highlight_test.rs index e0b356d..ab9dfcb 100644 --- a/cli/src/tests/highlight_test.rs +++ b/cli/src/tests/highlight_test.rs @@ -1,13 +1,18 @@ -use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path}; +use std::{ + ffi::CString, + fs, + os::raw::c_char, + ptr, slice, str, + sync::atomic::{AtomicUsize, Ordering}, +}; + use lazy_static::lazy_static; -use std::ffi::CString; -use std::os::raw::c_char; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{fs, ptr, slice, str}; use tree_sitter_highlight::{ c, Error, Highlight, HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer, }; +use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path}; + lazy_static! { static ref JS_HIGHLIGHT: HighlightConfiguration = get_highlight_config("javascript", Some("injections.scm"), &HIGHLIGHT_NAMES); @@ -24,6 +29,7 @@ lazy_static! { get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES); static ref HIGHLIGHT_NAMES: Vec = [ "attribute", + "boolean", "carriage-return", "comment", "constant", @@ -48,12 +54,12 @@ lazy_static! { "variable", ] .iter() - .cloned() + .copied() .map(String::from) .collect(); static ref HTML_ATTRS: Vec = HIGHLIGHT_NAMES .iter() - .map(|s| format!("class={}", s)) + .map(|s| format!("class={s}")) .collect(); } @@ -61,7 +67,7 @@ lazy_static! { fn test_highlighting_javascript() { let source = "const a = function(b) { return b + c; }"; assert_eq!( - &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), + &to_token_vector(source, &JS_HIGHLIGHT).unwrap(), &[vec![ ("const", vec!["keyword"]), (" ", vec![]), @@ -71,14 +77,14 @@ fn test_highlighting_javascript() { (" ", vec![]), ("function", vec!["keyword"]), ("(", vec!["punctuation.bracket"]), - ("b", vec!["variable.parameter"]), + ("b", vec!["variable"]), (")", vec!["punctuation.bracket"]), (" ", vec![]), ("{", vec!["punctuation.bracket"]), (" ", vec![]), ("return", vec!["keyword"]), (" ", vec![]), - ("b", vec!["variable.parameter"]), + ("b", vec!["variable"]), (" ", vec![]), ("+", vec!["operator"]), (" ", vec![]), @@ -92,7 +98,7 @@ fn test_highlighting_javascript() { #[test] fn test_highlighting_injected_html_in_javascript() { - let source = vec!["const s = html `
${a < b}
`;"].join("\n"); + let source = ["const s = html `
${a < b}
`;"].join("\n"); assert_eq!( &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), @@ -156,7 +162,7 @@ fn test_highlighting_injected_javascript_in_html_mini() { #[test] fn test_highlighting_injected_javascript_in_html() { - let source = vec![ + let source = [ "", " "].join("\n"); + let source = ["
<% foo() %>
"].join("\n"); assert_eq!( &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(), @@ -376,7 +383,7 @@ fn test_highlighting_ejs_with_html_and_javascript() { fn test_highlighting_javascript_with_jsdoc() { // Regression test: the middle comment has no highlights. This should not prevent // later injections from highlighting properly. - let source = vec!["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n"); + let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n"); assert_eq!( &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), @@ -404,7 +411,7 @@ fn test_highlighting_javascript_with_jsdoc() { #[test] fn test_highlighting_with_content_children_included() { - let source = vec!["assert!(", " a.b.c() < D::e::()", ");"].join("\n"); + let source = ["assert!(", " a.b.c() < D::e::()", ");"].join("\n"); assert_eq!( &to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(), @@ -482,7 +489,7 @@ fn test_highlighting_cancellation() { #[test] fn test_highlighting_via_c_api() { - let highlights = vec![ + let highlights = [ "class=tag\0", "class=function\0", "class=string\0", @@ -490,74 +497,86 @@ fn test_highlighting_via_c_api() { ]; let highlight_names = highlights .iter() - .map(|h| h["class=".len()..].as_ptr() as *const c_char) + .map(|h| h["class=".len()..].as_ptr().cast::()) .collect::>(); let highlight_attrs = highlights .iter() - .map(|h| h.as_bytes().as_ptr() as *const c_char) + .map(|h| h.as_bytes().as_ptr().cast::()) .collect::>(); - let highlighter = c::ts_highlighter_new( - &highlight_names[0] as *const *const c_char, - &highlight_attrs[0] as *const *const c_char, - highlights.len() as u32, - ); + let highlighter = unsafe { + c::ts_highlighter_new( + std::ptr::addr_of!(highlight_names[0]), + std::ptr::addr_of!(highlight_attrs[0]), + highlights.len() as u32, + ) + }; let source_code = c_string(""); let js_scope = c_string("source.js"); let js_injection_regex = c_string("^javascript"); let language = get_language("javascript"); + let lang_name = c_string("javascript"); let queries = get_language_queries_path("javascript"); let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap(); - c::ts_highlighter_add_language( - highlighter, - js_scope.as_ptr(), - js_injection_regex.as_ptr(), - language, - highlights_query.as_ptr() as *const c_char, - injections_query.as_ptr() as *const c_char, - locals_query.as_ptr() as *const c_char, - highlights_query.len() as u32, - injections_query.len() as u32, - locals_query.len() as u32, - ); + unsafe { + c::ts_highlighter_add_language( + highlighter, + lang_name.as_ptr(), + js_scope.as_ptr(), + js_injection_regex.as_ptr(), + language, + highlights_query.as_ptr().cast::(), + injections_query.as_ptr().cast::(), + locals_query.as_ptr().cast::(), + highlights_query.len() as u32, + injections_query.len() as u32, + locals_query.len() as u32, + ); + } let html_scope = c_string("text.html.basic"); let html_injection_regex = c_string("^html"); let language = get_language("html"); + let lang_name = c_string("html"); let queries = get_language_queries_path("html"); let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); - c::ts_highlighter_add_language( - highlighter, - html_scope.as_ptr(), - html_injection_regex.as_ptr(), - language, - highlights_query.as_ptr() as *const c_char, - injections_query.as_ptr() as *const c_char, - ptr::null(), - highlights_query.len() as u32, - injections_query.len() as u32, - 0, - ); + unsafe { + c::ts_highlighter_add_language( + highlighter, + lang_name.as_ptr(), + html_scope.as_ptr(), + html_injection_regex.as_ptr(), + language, + highlights_query.as_ptr().cast::(), + injections_query.as_ptr().cast::(), + ptr::null(), + highlights_query.len() as u32, + injections_query.len() as u32, + 0, + ); + } let buffer = c::ts_highlight_buffer_new(); - c::ts_highlighter_highlight( - highlighter, - html_scope.as_ptr(), - source_code.as_ptr(), - source_code.as_bytes().len() as u32, - buffer, - ptr::null_mut(), - ); + unsafe { + c::ts_highlighter_highlight( + highlighter, + html_scope.as_ptr(), + source_code.as_ptr(), + source_code.as_bytes().len() as u32, + buffer, + ptr::null_mut(), + ); + } - let output_bytes = c::ts_highlight_buffer_content(buffer); - let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer); - let output_len = c::ts_highlight_buffer_len(buffer); - let output_line_count = c::ts_highlight_buffer_line_count(buffer); + let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) }; + let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) }; + let output_len = unsafe { c::ts_highlight_buffer_len(buffer) }; + let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) }; let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) }; let output_line_offsets = @@ -568,8 +587,7 @@ fn test_highlighting_via_c_api() { let line_start = output_line_offsets[i] as usize; let line_end = output_line_offsets .get(i + 1) - .map(|x| *x as usize) - .unwrap_or(output_bytes.len()); + .map_or(output_bytes.len(), |x| *x as usize); lines.push(str::from_utf8(&output_bytes[line_start..line_end]).unwrap()); } @@ -583,8 +601,69 @@ fn test_highlighting_via_c_api() { ] ); - c::ts_highlighter_delete(highlighter); - c::ts_highlight_buffer_delete(buffer); + unsafe { + c::ts_highlighter_delete(highlighter); + c::ts_highlight_buffer_delete(buffer); + } +} + +#[test] +fn test_highlighting_with_all_captures_applied() { + let source = "fn main(a: u32, b: u32) -> { let c = a + b; }"; + let language = get_language("rust"); + let highlights_query = indoc::indoc! {" + [ + \"fn\" + \"let\" + ] @keyword + (identifier) @variable + (function_item name: (identifier) @function) + (parameter pattern: (identifier) @variable.parameter) + (primitive_type) @type.builtin + \"=\" @operator + [ \"->\" \":\" \";\" ] @punctuation.delimiter + [ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket + "}; + let mut rust_highlight_reverse = + HighlightConfiguration::new(language, "rust", highlights_query, "", "").unwrap(); + rust_highlight_reverse.configure(&HIGHLIGHT_NAMES); + + assert_eq!( + &to_token_vector(source, &rust_highlight_reverse).unwrap(), + &[[ + ("fn", vec!["keyword"]), + (" ", vec![]), + ("main", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + ("a", vec!["variable.parameter"]), + (":", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("u32", vec!["type.builtin"]), + (", ", vec![]), + ("b", vec!["variable.parameter"]), + (":", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("u32", vec!["type.builtin"]), + (")", vec!["punctuation.bracket"]), + (" ", vec![]), + ("->", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("{", vec!["punctuation.bracket"]), + (" ", vec![]), + ("let", vec!["keyword"]), + (" ", vec![]), + ("c", vec!["variable"]), + (" ", vec![]), + ("=", vec!["operator"]), + (" ", vec![]), + ("a", vec!["variable"]), + (" + ", vec![]), + ("b", vec!["variable"]), + (";", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("}", vec!["punctuation.bracket"]) + ]], + ); } #[test] @@ -641,9 +720,13 @@ fn to_html<'a>( renderer .render(events, src, &|highlight| HTML_ATTRS[highlight.0].as_bytes()) .unwrap(); - Ok(renderer.lines().map(|s| s.to_string()).collect()) + Ok(renderer + .lines() + .map(std::string::ToString::to_string) + .collect()) } +#[allow(clippy::type_complexity)] fn to_token_vector<'a>( src: &'a str, language_config: &'a HighlightConfiguration, @@ -667,20 +750,19 @@ fn to_token_vector<'a>( } HighlightEvent::Source { start, end } => { let s = str::from_utf8(&src[start..end]).unwrap(); - for (i, l) in s.split("\n").enumerate() { + for (i, l) in s.split('\n').enumerate() { let l = l.trim_end_matches('\r'); if i > 0 { - lines.push(line); - line = Vec::new(); + lines.push(std::mem::take(&mut line)); } - if l.len() > 0 { + if !l.is_empty() { line.push((l, highlights.clone())); } } } } } - if line.len() > 0 { + if !line.is_empty() { lines.push(line); } Ok(lines) diff --git a/cli/src/tests/language_test.rs b/cli/src/tests/language_test.rs new file mode 100644 index 0000000..681b93f --- /dev/null +++ b/cli/src/tests/language_test.rs @@ -0,0 +1,65 @@ +use tree_sitter::Parser; + +use super::helpers::fixtures::get_language; + +#[test] +fn test_lookahead_iterator() { + let mut parser = Parser::new(); + let language = get_language("rust"); + parser.set_language(&language).unwrap(); + + let tree = parser.parse("struct Stuff {}", None).unwrap(); + + let mut cursor = tree.walk(); + + assert!(cursor.goto_first_child()); // struct + assert!(cursor.goto_first_child()); // struct keyword + + let next_state = cursor.node().next_parse_state(); + assert_ne!(next_state, 0); + assert_eq!( + next_state, + language.next_state(cursor.node().parse_state(), cursor.node().grammar_id()) + ); + assert!((next_state as usize) < language.parse_state_count()); + assert!(cursor.goto_next_sibling()); // type_identifier + assert_eq!(next_state, cursor.node().parse_state()); + assert_eq!(cursor.node().grammar_name(), "identifier"); + assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id()); + + let expected_symbols = ["//", "/*", "identifier", "line_comment", "block_comment"]; + let mut lookahead = language.lookahead_iterator(next_state).unwrap(); + assert_eq!(*lookahead.language(), language); + assert!(lookahead.iter_names().eq(expected_symbols)); + + lookahead.reset_state(next_state); + assert!(lookahead.iter_names().eq(expected_symbols)); + + lookahead.reset(&language, next_state); + assert!(lookahead + .map(|s| language.node_kind_for_id(s).unwrap()) + .eq(expected_symbols)); +} + +#[test] +fn test_lookahead_iterator_modifiable_only_by_mut() { + let mut parser = Parser::new(); + let language = get_language("rust"); + parser.set_language(&language).unwrap(); + + let tree = parser.parse("struct Stuff {}", None).unwrap(); + + let mut cursor = tree.walk(); + + assert!(cursor.goto_first_child()); // struct + assert!(cursor.goto_first_child()); // struct keyword + + let next_state = cursor.node().next_parse_state(); + assert_ne!(next_state, 0); + + let mut lookahead = language.lookahead_iterator(next_state).unwrap(); + let _ = lookahead.next(); + + let mut names = lookahead.iter_names(); + let _ = names.next(); +} diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index 1b80445..596bc8d 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -1,11 +1,19 @@ +mod async_context_test; mod corpus_test; +mod detect_language; mod helpers; mod highlight_test; +mod language_test; mod node_test; +mod parser_hang_test; mod parser_test; mod pathological_test; mod query_test; mod tags_test; mod test_highlight_test; mod test_tags_test; +mod text_provider_test; mod tree_test; + +#[cfg(feature = "wasm")] +mod wasm_language_test; diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index 6d5ed61..898ccd8 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -1,12 +1,16 @@ -use super::helpers::edits::get_random_edit; -use super::helpers::fixtures::{fixtures_dir, get_language, get_test_language}; -use super::helpers::random::Rand; -use crate::generate::generate_parser_for_grammar; -use crate::parse::perform_edit; -use std::fs; use tree_sitter::{Node, Parser, Point, Tree}; -const JSON_EXAMPLE: &'static str = r#" +use super::helpers::{ + edits::get_random_edit, + fixtures::{fixtures_dir, get_language, get_test_language}, + random::Rand, +}; +use crate::{ + generate::{generate_parser_for_grammar, load_grammar_file}, + parse::perform_edit, +}; + +const JSON_EXAMPLE: &str = r#" [ 123, @@ -17,7 +21,7 @@ const JSON_EXAMPLE: &'static str = r#" ] "#; -const GRAMMAR_WITH_ALIASES_AND_EXTRAS: &'static str = r#"{ +const GRAMMAR_WITH_ALIASES_AND_EXTRAS: &str = r#"{ "name": "aliases_and_extras", "extras": [ @@ -60,8 +64,8 @@ fn test_node_child() { assert_eq!(array_node.kind(), "array"); assert_eq!(array_node.named_child_count(), 3); - assert_eq!(array_node.start_byte(), JSON_EXAMPLE.find("[").unwrap()); - assert_eq!(array_node.end_byte(), JSON_EXAMPLE.find("]").unwrap() + 1); + assert_eq!(array_node.start_byte(), JSON_EXAMPLE.find('[').unwrap()); + assert_eq!(array_node.end_byte(), JSON_EXAMPLE.find(']').unwrap() + 1); assert_eq!(array_node.start_position(), Point::new(2, 0)); assert_eq!(array_node.end_position(), Point::new(8, 1)); assert_eq!(array_node.child_count(), 7); @@ -82,13 +86,13 @@ fn test_node_child() { assert_eq!(object_node.kind(), "object"); assert_eq!(right_bracket_node.kind(), "]"); - assert_eq!(left_bracket_node.is_named(), false); - assert_eq!(number_node.is_named(), true); - assert_eq!(comma_node1.is_named(), false); - assert_eq!(false_node.is_named(), true); - assert_eq!(comma_node2.is_named(), false); - assert_eq!(object_node.is_named(), true); - assert_eq!(right_bracket_node.is_named(), false); + assert!(!left_bracket_node.is_named()); + assert!(number_node.is_named()); + assert!(!comma_node1.is_named()); + assert!(false_node.is_named()); + assert!(!comma_node2.is_named()); + assert!(object_node.is_named()); + assert!(!right_bracket_node.is_named()); assert_eq!(number_node.start_byte(), JSON_EXAMPLE.find("123").unwrap()); assert_eq!( @@ -106,7 +110,7 @@ fn test_node_child() { assert_eq!(false_node.start_position(), Point::new(4, 2)); assert_eq!(false_node.end_position(), Point::new(4, 7)); - assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find("{").unwrap()); + assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find('{').unwrap()); assert_eq!(object_node.start_position(), Point::new(5, 2)); assert_eq!(object_node.end_position(), Point::new(7, 3)); @@ -119,9 +123,9 @@ fn test_node_child() { assert_eq!(pair_node.kind(), "pair"); assert_eq!(right_brace_node.kind(), "}"); - assert_eq!(left_brace_node.is_named(), false); - assert_eq!(pair_node.is_named(), true); - assert_eq!(right_brace_node.is_named(), false); + assert!(!left_brace_node.is_named()); + assert!(pair_node.is_named()); + assert!(!right_brace_node.is_named()); assert_eq!(pair_node.start_byte(), JSON_EXAMPLE.find("\"x\"").unwrap()); assert_eq!(pair_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4); @@ -137,9 +141,9 @@ fn test_node_child() { assert_eq!(colon_node.kind(), ":"); assert_eq!(null_node.kind(), "null"); - assert_eq!(string_node.is_named(), true); - assert_eq!(colon_node.is_named(), false); - assert_eq!(null_node.is_named(), true); + assert!(string_node.is_named()); + assert!(!colon_node.is_named()); + assert!(null_node.is_named()); assert_eq!( string_node.start_byte(), @@ -165,6 +169,22 @@ fn test_node_child() { assert_eq!(object_node.parent().unwrap(), array_node); assert_eq!(array_node.parent().unwrap(), tree.root_node()); assert_eq!(tree.root_node().parent(), None); + + assert_eq!( + tree.root_node() + .child_containing_descendant(null_node) + .unwrap(), + array_node + ); + assert_eq!( + array_node.child_containing_descendant(null_node).unwrap(), + object_node + ); + assert_eq!( + object_node.child_containing_descendant(null_node).unwrap(), + pair_node + ); + assert_eq!(pair_node.child_containing_descendant(null_node), None); } #[test] @@ -202,7 +222,7 @@ fn test_node_children() { #[test] fn test_node_children_by_field_name() { let mut parser = Parser::new(); - parser.set_language(get_language("python")).unwrap(); + parser.set_language(&get_language("python")).unwrap(); let source = " if one: a() @@ -230,7 +250,7 @@ fn test_node_children_by_field_name() { #[test] fn test_node_parent_of_child_by_field_name() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse("foo(a().b[0].c.d.e())", None).unwrap(); let call_node = tree .root_node() @@ -248,16 +268,44 @@ fn test_node_parent_of_child_by_field_name() { ); } +#[test] +fn test_parent_of_zero_width_node() { + let code = "def dupa(foo):"; + + let mut parser = Parser::new(); + parser.set_language(&get_language("python")).unwrap(); + + let tree = parser.parse(code, None).unwrap(); + let root = tree.root_node(); + let function_definition = root.child(0).unwrap(); + let block = function_definition.child(4).unwrap(); + let block_parent = block.parent().unwrap(); + + assert_eq!(block.to_string(), "(block)"); + assert_eq!(block_parent.kind(), "function_definition"); + assert_eq!(block_parent.to_string(), "(function_definition name: (identifier) parameters: (parameters (identifier)) body: (block))"); + + assert_eq!( + root.child_containing_descendant(block).unwrap(), + function_definition + ); + assert_eq!(function_definition.child_containing_descendant(block), None); +} + #[test] fn test_node_field_name_for_child() { let mut parser = Parser::new(); - parser.set_language(get_language("c")).unwrap(); - let tree = parser.parse("x + y;", None).unwrap(); + parser.set_language(&get_language("c")).unwrap(); + let tree = parser + .parse("int w = x + /* y is special! */ y;", None) + .unwrap(); let translation_unit_node = tree.root_node(); - let binary_expression_node = translation_unit_node - .named_child(0) + let declaration_node = translation_unit_node.named_child(0).unwrap(); + + let binary_expression_node = declaration_node + .child_by_field_name("declarator") .unwrap() - .named_child(0) + .child_by_field_name("value") .unwrap(); assert_eq!(binary_expression_node.field_name_for_child(0), Some("left")); @@ -265,18 +313,20 @@ fn test_node_field_name_for_child() { binary_expression_node.field_name_for_child(1), Some("operator") ); + // The comment should not have a field name, as it's just an extra + assert_eq!(binary_expression_node.field_name_for_child(2), None); assert_eq!( - binary_expression_node.field_name_for_child(2), + binary_expression_node.field_name_for_child(3), Some("right") ); // Negative test - Not a valid child index - assert_eq!(binary_expression_node.field_name_for_child(3), None); + assert_eq!(binary_expression_node.field_name_for_child(4), None); } #[test] fn test_node_child_by_field_name_with_extra_hidden_children() { let mut parser = Parser::new(); - parser.set_language(get_language("python")).unwrap(); + parser.set_language(&get_language("python")).unwrap(); // In the Python grammar, some fields are applied to `suite` nodes, // which consist of an invisible `indent` token followed by a block. @@ -319,7 +369,7 @@ fn test_node_named_child() { assert_eq!(false_node.end_position(), Point::new(4, 7)); assert_eq!(object_node.kind(), "object"); - assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find("{").unwrap()); + assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find('{').unwrap()); assert_eq!(object_node.start_position(), Point::new(5, 2)); assert_eq!(object_node.end_position(), Point::new(7, 3)); @@ -362,6 +412,22 @@ fn test_node_named_child() { assert_eq!(object_node.parent().unwrap(), array_node); assert_eq!(array_node.parent().unwrap(), tree.root_node()); assert_eq!(tree.root_node().parent(), None); + + assert_eq!( + tree.root_node() + .child_containing_descendant(null_node) + .unwrap(), + array_node + ); + assert_eq!( + array_node.child_containing_descendant(null_node).unwrap(), + object_node + ); + assert_eq!( + object_node.child_containing_descendant(null_node).unwrap(), + pair_node + ); + assert_eq!(pair_node.child_containing_descendant(null_node), None); } #[test] @@ -371,7 +437,7 @@ fn test_node_named_child_with_aliases_and_extras() { let mut parser = Parser::new(); parser - .set_language(get_test_language(&parser_name, &parser_code, None)) + .set_language(&get_test_language(&parser_name, &parser_code, None)) .unwrap(); let tree = parser.parse("b ... b ... c", None).unwrap(); @@ -385,13 +451,55 @@ fn test_node_named_child_with_aliases_and_extras() { assert_eq!(root.named_child(4).unwrap().kind(), "C"); } +#[test] +fn test_node_descendant_count() { + let tree = parse_json_example(); + let value_node = tree.root_node(); + let all_nodes = get_all_nodes(&tree); + + assert_eq!(value_node.descendant_count(), all_nodes.len()); + + let mut cursor = value_node.walk(); + for (i, node) in all_nodes.iter().enumerate() { + cursor.goto_descendant(i); + assert_eq!(cursor.node(), *node, "index {i}"); + } + + for (i, node) in all_nodes.iter().enumerate().rev() { + cursor.goto_descendant(i); + assert_eq!(cursor.node(), *node, "rev index {i}"); + } +} + +#[test] +fn test_descendant_count_single_node_tree() { + let mut parser = Parser::new(); + parser + .set_language(&get_language("embedded-template")) + .unwrap(); + let tree = parser.parse("hello", None).unwrap(); + + let nodes = get_all_nodes(&tree); + assert_eq!(nodes.len(), 2); + assert_eq!(tree.root_node().descendant_count(), 2); + + let mut cursor = tree.root_node().walk(); + + cursor.goto_descendant(0); + assert_eq!(cursor.depth(), 0); + assert_eq!(cursor.node(), nodes[0]); + cursor.goto_descendant(1); + assert_eq!(cursor.depth(), 1); + assert_eq!(cursor.node(), nodes[1]); +} + #[test] fn test_node_descendant_for_range() { let tree = parse_json_example(); - let array_node = tree.root_node().child(0).unwrap(); + let array_node = tree.root_node(); // Leaf node exactly matches the given bounds - byte query - let colon_index = JSON_EXAMPLE.find(":").unwrap(); + let colon_index = JSON_EXAMPLE.find(':').unwrap(); let colon_node = array_node .descendant_for_byte_range(colon_index, colon_index + 1) .unwrap(); @@ -412,7 +520,7 @@ fn test_node_descendant_for_range() { assert_eq!(colon_node.end_position(), Point::new(6, 8)); // The given point is between two adjacent leaf nodes - byte query - let colon_index = JSON_EXAMPLE.find(":").unwrap(); + let colon_index = JSON_EXAMPLE.find(':').unwrap(); let colon_node = array_node .descendant_for_byte_range(colon_index, colon_index) .unwrap(); @@ -506,10 +614,10 @@ fn test_node_edit() { for _ in 0..10 { let mut nodes_before = get_all_nodes(&tree); - let edit = get_random_edit(&mut rand, &mut code); + let edit = get_random_edit(&mut rand, &code); let mut tree2 = tree.clone(); - let edit = perform_edit(&mut tree2, &mut code, &edit); - for node in nodes_before.iter_mut() { + let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap(); + for node in &mut nodes_before { node.edit(&edit); } @@ -532,7 +640,7 @@ fn test_node_edit() { #[test] fn test_root_node_with_offset() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse(" if (a) b", None).unwrap(); let node = tree.root_node_with_offset(6, Point::new(2, 2)); @@ -560,7 +668,7 @@ fn test_root_node_with_offset() { #[test] fn test_node_is_extra() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse("foo(/* hi */);", None).unwrap(); let root_node = tree.root_node(); @@ -575,7 +683,7 @@ fn test_node_is_extra() { #[test] fn test_node_sexp() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse("if (a) b", None).unwrap(); let root_node = tree.root_node(); let if_node = root_node.descendant_for_byte_range(0, 0).unwrap(); @@ -664,7 +772,7 @@ fn test_node_field_names() { let mut parser = Parser::new(); let language = get_test_language(&parser_name, &parser_code, None); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser .parse("child-0 child-1 child-2 child-3 child-4", None) @@ -734,7 +842,7 @@ fn test_node_field_calls_in_language_without_fields() { let mut parser = Parser::new(); let language = get_test_language(&parser_name, &parser_code, None); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse("b c d", None).unwrap(); @@ -744,26 +852,26 @@ fn test_node_field_calls_in_language_without_fields() { let mut cursor = root_node.walk(); assert_eq!(cursor.field_name(), None); - assert_eq!(cursor.goto_first_child(), true); + assert!(cursor.goto_first_child()); assert_eq!(cursor.field_name(), None); } #[test] fn test_node_is_named_but_aliased_as_anonymous() { - let (parser_name, parser_code) = generate_parser_for_grammar( - &fs::read_to_string( - &fixtures_dir() - .join("test_grammars") - .join("named_rule_aliased_as_anonymous") - .join("grammar.json"), - ) - .unwrap(), + let grammar_json = load_grammar_file( + &fixtures_dir() + .join("test_grammars") + .join("named_rule_aliased_as_anonymous") + .join("grammar.js"), + None, ) .unwrap(); + let (parser_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap(); + let mut parser = Parser::new(); let language = get_test_language(&parser_name, &parser_code, None); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse("B C B", None).unwrap(); @@ -782,13 +890,14 @@ fn test_node_is_named_but_aliased_as_anonymous() { #[test] fn test_node_numeric_symbols_respect_simple_aliases() { let mut parser = Parser::new(); - parser.set_language(get_language("python")).unwrap(); + parser.set_language(&get_language("python")).unwrap(); // Example 1: - // Python argument lists can contain "splat" arguments, which are not allowed within - // other expressions. This includes `parenthesized_list_splat` nodes like `(*b)`. These - // `parenthesized_list_splat` nodes are aliased as `parenthesized_expression`. Their numeric - // `symbol`, aka `kind_id` should match that of a normal `parenthesized_expression`. + // Python argument lists can contain "splat" arguments, which are not allowed + // within other expressions. This includes `parenthesized_list_splat` nodes + // like `(*b)`. These `parenthesized_list_splat` nodes are aliased as + // `parenthesized_expression`. Their numeric `symbol`, aka `kind_id` should + // match that of a normal `parenthesized_expression`. let tree = parser.parse("(a((*b)))", None).unwrap(); let root = tree.root_node(); assert_eq!( @@ -810,10 +919,10 @@ fn test_node_numeric_symbols_respect_simple_aliases() { assert_eq!(inner_expr_node.kind_id(), outer_expr_node.kind_id()); // Example 2: - // Ruby handles the unary (negative) and binary (minus) `-` operators using two different - // tokens. One or more of these is an external token that's aliased as `-`. Their numeric - // kind ids should match. - parser.set_language(get_language("ruby")).unwrap(); + // Ruby handles the unary (negative) and binary (minus) `-` operators using two + // different tokens. One or more of these is an external token that's + // aliased as `-`. Their numeric kind ids should match. + parser.set_language(&get_language("ruby")).unwrap(); let tree = parser.parse("-a - b", None).unwrap(); let root = tree.root_node(); assert_eq!( @@ -841,22 +950,22 @@ fn get_all_nodes(tree: &Tree) -> Vec { let mut visited_children = false; let mut cursor = tree.walk(); loop { - result.push(cursor.node()); - if !visited_children && cursor.goto_first_child() { - continue; + if !visited_children { + result.push(cursor.node()); + if !cursor.goto_first_child() { + visited_children = true; + } } else if cursor.goto_next_sibling() { visited_children = false; - } else if cursor.goto_parent() { - visited_children = true; - } else { + } else if !cursor.goto_parent() { break; } } - return result; + result } fn parse_json_example() -> Tree { let mut parser = Parser::new(); - parser.set_language(get_language("json")).unwrap(); + parser.set_language(&get_language("json")).unwrap(); parser.parse(JSON_EXAMPLE, None).unwrap() } diff --git a/cli/src/tests/parser_hang_test.rs b/cli/src/tests/parser_hang_test.rs new file mode 100644 index 0000000..e195a88 --- /dev/null +++ b/cli/src/tests/parser_hang_test.rs @@ -0,0 +1,105 @@ +// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches. +#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))] + +use std::{ + env::VarError, + process::{Command, Stdio}, +}; + +use tree_sitter::Parser; + +use crate::{ + generate::{generate_parser_for_grammar, load_grammar_file}, + tests::helpers::fixtures::{fixtures_dir, get_test_language}, +}; + +// The `sanitizing` cfg is required to don't run tests under specific sunitizer +// because they don't work well with subprocesses _(it's an assumption)_. +// +// Below are two alternative examples of how to disable tests for some arches +// if a way with excluding the whole mod from compilation wouldn't work well. +// +// XXX: Also may be it makes sense to keep such tests as ignored by default +// to omit surprises and enable them on CI by passing an extra option explicitly: +// +// > cargo test -- --include-ignored +// +// #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))] +// #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)] +// +#[test] +fn test_grammar_that_should_hang_and_not_segfault() { + let parent_sleep_millis = 1000; + let test_name = "test_grammar_that_should_hang_and_not_segfault"; + let test_var = "CARGO_HANG_TEST"; + + eprintln!(" {test_name}"); + + let tests_exec_path = std::env::args() + .next() + .expect("Failed to get tests executable path"); + + match std::env::var(test_var) { + Ok(v) if v == test_name => { + eprintln!(" child process id {}", std::process::id()); + hang_test(); + } + + Err(VarError::NotPresent) => { + eprintln!(" parent process id {}", std::process::id()); + let mut command = Command::new(tests_exec_path); + command.arg(test_name).env(test_var, test_name); + + if std::env::args().any(|x| x == "--nocapture") { + command.arg("--nocapture"); + } else { + command.stdout(Stdio::null()).stderr(Stdio::null()); + } + + match command.spawn() { + Ok(mut child) => { + std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis)); + match child.try_wait() { + Ok(Some(status)) if status.success() => { + panic!("Child didn't hang and exited successfully") + } + Ok(Some(status)) => panic!( + "Child didn't hang and exited with status code: {:?}", + status.code() + ), + _ => (), + } + if let Err(e) = child.kill() { + eprintln!( + "Failed to kill hang test's process id: {}, error: {e}", + child.id() + ); + } + } + Err(e) => panic!("{e}"), + } + } + + Err(e) => panic!("Env var error: {e}"), + + _ => unreachable!(), + } +} + +fn hang_test() { + let test_grammar_dir = fixtures_dir() + .join("test_grammars") + .join("get_col_should_hang_not_crash"); + + let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap(); + let (parser_name, parser_code) = generate_parser_for_grammar(grammar_json.as_str()).unwrap(); + + let language = get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path())); + + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + + let code_that_should_hang = "\nHello"; + + parser.parse(code_that_should_hang, None).unwrap(); +} diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 78c6cda..9e5f92c 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -1,24 +1,26 @@ +use std::{ + sync::atomic::{AtomicUsize, Ordering}, + thread, time, +}; + +use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range}; +use tree_sitter_proc_macro::retry; + use super::helpers::{ allocations, - edits::invert_edit, - edits::ReadRecorder, - fixtures::{get_language, get_test_grammar, get_test_language}, + edits::{invert_edit, ReadRecorder}, + fixtures::{get_language, get_test_language}, }; use crate::{ - generate::generate_parser_for_grammar, + generate::{generate_parser_for_grammar, load_grammar_file}, parse::{perform_edit, Edit}, + tests::helpers::fixtures::fixtures_dir, }; -use proc_macro::retry; -use std::{ - sync::atomic::{AtomicUsize, Ordering}, - thread, time, -}; -use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range}; #[test] fn test_parsing_simple_string() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let tree = parser .parse( @@ -49,7 +51,7 @@ fn test_parsing_simple_string() { #[test] fn test_parsing_with_logging() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let mut messages = Vec::new(); parser.set_logger(Some(Box::new(|log_type, message| { @@ -90,21 +92,20 @@ fn test_parsing_with_debug_graph_enabled() { let has_zero_indexed_row = |s: &str| s.contains("position: 0,"); let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let mut debug_graph_file = tempfile::tempfile().unwrap(); parser.print_dot_graphs(&debug_graph_file); parser.parse("const zero = 0", None).unwrap(); - debug_graph_file.seek(std::io::SeekFrom::Start(0)).unwrap(); + debug_graph_file.rewind().unwrap(); let log_reader = BufReader::new(debug_graph_file) .lines() .map(|l| l.expect("Failed to read line from graph log")); for line in log_reader { assert!( !has_zero_indexed_row(&line), - "Graph log output includes zero-indexed row: {}", - line + "Graph log output includes zero-indexed row: {line}", ); } } @@ -112,7 +113,7 @@ fn test_parsing_with_debug_graph_enabled() { #[test] fn test_parsing_with_custom_utf8_input() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let lines = &["pub fn foo() {", " 1", "}"]; @@ -125,7 +126,7 @@ fn test_parsing_with_custom_utf8_input() { if column < lines[row].as_bytes().len() { &lines[row].as_bytes()[column..] } else { - "\n".as_bytes() + b"\n" } } else { &[] @@ -148,19 +149,19 @@ fn test_parsing_with_custom_utf8_input() { ) ); assert_eq!(root.kind(), "source_file"); - assert_eq!(root.has_error(), false); + assert!(!root.has_error()); assert_eq!(root.child(0).unwrap().kind(), "function_item"); } #[test] fn test_parsing_with_custom_utf16_input() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); - let lines: Vec> = ["pub fn foo() {", " 1", "}"] + let lines = ["pub fn foo() {", " 1", "}"] .iter() - .map(|s| s.encode_utf16().collect()) - .collect(); + .map(|s| s.encode_utf16().collect::>()) + .collect::>(); let tree = parser .parse_utf16_with( @@ -187,14 +188,14 @@ fn test_parsing_with_custom_utf16_input() { "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))" ); assert_eq!(root.kind(), "source_file"); - assert_eq!(root.has_error(), false); + assert!(!root.has_error()); assert_eq!(root.child(0).unwrap().kind(), "function_item"); } #[test] fn test_parsing_with_callback_returning_owned_strings() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let text = b"pub fn foo() { 1 }"; @@ -215,7 +216,7 @@ fn test_parsing_with_callback_returning_owned_strings() { #[test] fn test_parsing_text_with_byte_order_mark() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); // Parse UTF16 text with a BOM let tree = parser @@ -274,15 +275,18 @@ fn test_parsing_text_with_byte_order_mark() { #[test] fn test_parsing_invalid_chars_at_eof() { let mut parser = Parser::new(); - parser.set_language(get_language("json")).unwrap(); + parser.set_language(&get_language("json")).unwrap(); let tree = parser.parse(b"\xdf", None).unwrap(); - assert_eq!(tree.root_node().to_sexp(), "(ERROR (UNEXPECTED INVALID))"); + assert_eq!( + tree.root_node().to_sexp(), + "(document (ERROR (UNEXPECTED INVALID)))" + ); } #[test] fn test_parsing_unexpected_null_characters_within_source() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse(b"var \0 something;", None).unwrap(); assert_eq!( tree.root_node().to_sexp(), @@ -293,7 +297,7 @@ fn test_parsing_unexpected_null_characters_within_source() { #[test] fn test_parsing_ends_when_input_callback_returns_empty() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let mut i = 0; let source = b"abcdefghijklmnoqrs"; let tree = parser @@ -317,7 +321,7 @@ fn test_parsing_ends_when_input_callback_returns_empty() { #[test] fn test_parsing_after_editing_beginning_of_code() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let mut code = b"123 + 456 * (10 + x);".to_vec(); let mut tree = parser.parse(&code, None).unwrap(); @@ -339,7 +343,8 @@ fn test_parsing_after_editing_beginning_of_code() { deleted_length: 0, inserted_text: b" || 5".to_vec(), }, - ); + ) + .unwrap(); let mut recorder = ReadRecorder::new(&code); let tree = parser @@ -364,7 +369,7 @@ fn test_parsing_after_editing_beginning_of_code() { #[test] fn test_parsing_after_editing_end_of_code() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let mut code = b"x * (100 + abc);".to_vec(); let mut tree = parser.parse(&code, None).unwrap(); @@ -386,7 +391,8 @@ fn test_parsing_after_editing_end_of_code() { deleted_length: 0, inserted_text: b".d".to_vec(), }, - ); + ) + .unwrap(); let mut recorder = ReadRecorder::new(&code); let tree = parser @@ -411,7 +417,7 @@ fn test_parsing_after_editing_end_of_code() { #[test] fn test_parsing_empty_file_with_reused_tree() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let tree = parser.parse("", None); parser.parse("", tree.as_ref()); @@ -422,16 +428,15 @@ fn test_parsing_empty_file_with_reused_tree() { #[test] fn test_parsing_after_editing_tree_that_depends_on_column_values() { - let (grammar, path) = get_test_grammar("uses_current_column"); - let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar).unwrap(); + let dir = fixtures_dir() + .join("test_grammars") + .join("uses_current_column"); + let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap(); + let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap(); let mut parser = Parser::new(); parser - .set_language(get_test_language( - &grammar_name, - &parser_code, - path.as_ref().map(AsRef::as_ref), - )) + .set_language(&get_test_language(&grammar_name, &parser_code, Some(&dir))) .unwrap(); let mut code = b" @@ -461,7 +466,8 @@ h + i deleted_length: 0, inserted_text: b"1234".to_vec(), }, - ); + ) + .unwrap(); assert_eq!( code, @@ -500,13 +506,13 @@ h + i #[test] fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() { let mut parser = Parser::new(); - parser.set_language(get_language("python")).unwrap(); + parser.set_language(&get_language("python")).unwrap(); let mut source = b"a = b, 'c, d'".to_vec(); let tree = parser.parse(&source, None).unwrap(); assert_eq!( tree.root_node().to_sexp(), - "(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string string_content: (string_content))))))" + "(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string (string_start) (string_content) (string_end))))))" ); // Delete a suffix of the source code, starting in the middle of the string @@ -525,12 +531,12 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() { let undo = invert_edit(&source, &edit); let mut tree2 = tree.clone(); - perform_edit(&mut tree2, &mut source, &edit); + perform_edit(&mut tree2, &mut source, &edit).unwrap(); tree2 = parser.parse(&source, Some(&tree2)).unwrap(); assert!(tree2.root_node().has_error()); let mut tree3 = tree2.clone(); - perform_edit(&mut tree3, &mut source, &undo); + perform_edit(&mut tree3, &mut source, &undo).unwrap(); tree3 = parser.parse(&source, Some(&tree3)).unwrap(); assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),); } @@ -544,7 +550,7 @@ fn test_parsing_on_multiple_threads() { let this_file_source = include_str!("parser_test.rs"); let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let tree = parser.parse(this_file_source, None).unwrap(); let mut parse_threads = Vec::new(); @@ -572,7 +578,7 @@ fn test_parsing_on_multiple_threads() { // Reparse using the old tree as a starting point. let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); parser.parse(&prepended_source, Some(&tree_clone)).unwrap() })); } @@ -593,7 +599,7 @@ fn test_parsing_cancelled_by_another_thread() { let cancellation_flag = std::sync::Arc::new(AtomicUsize::new(0)); let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); unsafe { parser.set_cancellation_flag(Some(&cancellation_flag)) }; // Long input - parsing succeeds @@ -642,7 +648,7 @@ fn test_parsing_cancelled_by_another_thread() { #[retry(10)] fn test_parsing_with_a_timeout() { let mut parser = Parser::new(); - parser.set_language(get_language("json")).unwrap(); + parser.set_language(&get_language("json")).unwrap(); // Parse an infinitely-long array, but pause after 1ms of processing. parser.set_timeout_micros(1000); @@ -681,14 +687,10 @@ fn test_parsing_with_a_timeout() { parser.set_timeout_micros(0); let tree = parser .parse_with( - &mut |offset, _| { - if offset > 5000 { - "".as_bytes() - } else if offset == 5000 { - "]".as_bytes() - } else { - ",0".as_bytes() - } + &mut |offset, _| match offset { + 5001.. => "".as_bytes(), + 5000 => "]".as_bytes(), + _ => ",0".as_bytes(), }, None, ) @@ -697,9 +699,10 @@ fn test_parsing_with_a_timeout() { } #[test] +#[retry(10)] fn test_parsing_with_a_timeout_and_a_reset() { let mut parser = Parser::new(); - parser.set_language(get_language("json")).unwrap(); + parser.set_language(&get_language("json")).unwrap(); parser.set_timeout_micros(5); let tree = parser.parse( @@ -752,10 +755,11 @@ fn test_parsing_with_a_timeout_and_a_reset() { } #[test] +#[retry(10)] fn test_parsing_with_a_timeout_and_implicit_reset() { allocations::record(|| { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); parser.set_timeout_micros(5); let tree = parser.parse( @@ -766,7 +770,7 @@ fn test_parsing_with_a_timeout_and_implicit_reset() { // Changing the parser's language implicitly resets, discarding // the previous partial parse. - parser.set_language(get_language("json")).unwrap(); + parser.set_language(&get_language("json")).unwrap(); parser.set_timeout_micros(0); let tree = parser.parse( "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", @@ -785,10 +789,11 @@ fn test_parsing_with_a_timeout_and_implicit_reset() { } #[test] +#[retry(10)] fn test_parsing_with_timeout_and_no_completion() { allocations::record(|| { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); parser.set_timeout_micros(5); let tree = parser.parse( @@ -808,15 +813,25 @@ fn test_parsing_with_one_included_range() { let source_code = "hi"; let mut parser = Parser::new(); - parser.set_language(get_language("html")).unwrap(); + parser.set_language(&get_language("html")).unwrap(); let html_tree = parser.parse(source_code, None).unwrap(); let script_content_node = html_tree.root_node().child(1).unwrap().child(1).unwrap(); assert_eq!(script_content_node.kind(), "raw_text"); + assert_eq!( + parser.included_ranges(), + &[Range { + start_byte: 0, + end_byte: u32::MAX as usize, + start_point: Point::new(0, 0), + end_point: Point::new(u32::MAX as usize, u32::MAX as usize), + }] + ); parser .set_included_ranges(&[script_content_node.range()]) .unwrap(); - parser.set_language(get_language("javascript")).unwrap(); + assert_eq!(parser.included_ranges(), &[script_content_node.range()]); + parser.set_language(&get_language("javascript")).unwrap(); let js_tree = parser.parse(source_code, None).unwrap(); assert_eq!( @@ -824,7 +839,7 @@ fn test_parsing_with_one_included_range() { concat!( "(program (expression_statement (call_expression ", "function: (member_expression object: (identifier) property: (property_identifier)) ", - "arguments: (arguments (string)))))", + "arguments: (arguments (string (string_fragment))))))", ) ); assert_eq!( @@ -839,23 +854,23 @@ fn test_parsing_with_multiple_included_ranges() { let source_code = "html `
Hello, ${name.toUpperCase()}, it's ${now()}.
`"; let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let js_tree = parser.parse(source_code, None).unwrap(); let template_string_node = js_tree .root_node() .descendant_for_byte_range( - source_code.find("
").unwrap(), - source_code.find("Hello").unwrap(), + source_code.find("`<").unwrap(), + source_code.find(">`").unwrap(), ) .unwrap(); assert_eq!(template_string_node.kind(), "template_string"); let open_quote_node = template_string_node.child(0).unwrap(); - let interpolation_node1 = template_string_node.child(1).unwrap(); - let interpolation_node2 = template_string_node.child(2).unwrap(); - let close_quote_node = template_string_node.child(3).unwrap(); + let interpolation_node1 = template_string_node.child(2).unwrap(); + let interpolation_node2 = template_string_node.child(4).unwrap(); + let close_quote_node = template_string_node.child(6).unwrap(); - parser.set_language(get_language("html")).unwrap(); + parser.set_language(&get_language("html")).unwrap(); let html_ranges = &[ Range { start_byte: open_quote_node.end_byte(), @@ -882,7 +897,7 @@ fn test_parsing_with_multiple_included_ranges() { assert_eq!( html_tree.root_node().to_sexp(), concat!( - "(fragment (element", + "(document (element", " (start_tag (tag_name))", " (text)", " (element (start_tag (tag_name)) (end_tag (tag_name)))", @@ -934,7 +949,7 @@ fn test_parsing_with_included_range_containing_mismatched_positions() { let source_code = "
test
{_ignore_this_part_}"; let mut parser = Parser::new(); - parser.set_language(get_language("html")).unwrap(); + parser.set_language(&get_language("html")).unwrap(); let end_byte = source_code.find("{_ignore_this_part_").unwrap(); @@ -961,7 +976,7 @@ fn test_parsing_with_included_range_containing_mismatched_positions() { assert_eq!( html_tree.root_node().to_sexp(), - "(fragment (element (start_tag (tag_name)) (text) (end_tag (tag_name))))" + "(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))" ); } @@ -1009,13 +1024,17 @@ fn test_parsing_error_in_invalid_included_ranges() { #[test] fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() { let source_code = ""; - let utf16_source_code: Vec = source_code.as_bytes().iter().map(|c| *c as u16).collect(); + let utf16_source_code = source_code + .as_bytes() + .iter() + .map(|c| u16::from(*c)) + .collect::>(); let start_byte = 2 * source_code.find("a.").unwrap(); let end_byte = 2 * source_code.find("").unwrap(); let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); parser .set_included_ranges(&[Range { start_byte, @@ -1037,7 +1056,7 @@ fn test_parsing_with_external_scanner_that_uses_included_range_boundaries() { let range2_end_byte = range2_start_byte + " d() ".len(); let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); parser .set_included_ranges(&[ Range { @@ -1081,7 +1100,7 @@ fn test_parsing_with_a_newly_excluded_range() { // Parse HTML including the template directive, which will cause an error let mut parser = Parser::new(); - parser.set_language(get_language("html")).unwrap(); + parser.set_language(&get_language("html")).unwrap(); let mut first_tree = parser .parse_with(&mut chunked_input(&source_code, 3), None) .unwrap(); @@ -1126,7 +1145,7 @@ fn test_parsing_with_a_newly_excluded_range() { assert_eq!( tree.root_node().to_sexp(), concat!( - "(fragment (text) (element", + "(document (text) (element", " (start_tag (tag_name))", " (element (start_tag (tag_name)) (end_tag (tag_name)))", " (end_tag (tag_name))))" @@ -1168,12 +1187,12 @@ fn test_parsing_with_a_newly_included_range() { // Parse only the first code directive as JavaScript let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); parser .set_included_ranges(&[simple_range(range1_start, range1_end)]) .unwrap(); let tree = parser - .parse_with(&mut chunked_input(&source_code, 3), None) + .parse_with(&mut chunked_input(source_code, 3), None) .unwrap(); assert_eq!( tree.root_node().to_sexp(), @@ -1192,7 +1211,7 @@ fn test_parsing_with_a_newly_included_range() { ]) .unwrap(); let tree2 = parser - .parse_with(&mut chunked_input(&source_code, 3), Some(&tree)) + .parse_with(&mut chunked_input(source_code, 3), Some(&tree)) .unwrap(); assert_eq!( tree2.root_node().to_sexp(), @@ -1216,7 +1235,7 @@ fn test_parsing_with_a_newly_included_range() { simple_range(range3_start, range3_end), ]) .unwrap(); - let tree3 = parser.parse(&source_code, Some(&tree)).unwrap(); + let tree3 = parser.parse(source_code, Some(&tree)).unwrap(); assert_eq!( tree3.root_node().to_sexp(), concat!( @@ -1260,7 +1279,7 @@ fn test_parsing_with_included_ranges_and_missing_tokens() { let mut parser = Parser::new(); parser - .set_language(get_test_language(&parser_name, &parser_code, None)) + .set_language(&get_test_language(&parser_name, &parser_code, None)) .unwrap(); // There's a missing `a` token at the beginning of the code. It must be inserted @@ -1293,7 +1312,119 @@ fn test_parsing_with_included_ranges_and_missing_tokens() { assert_eq!(root.child(3).unwrap().start_byte(), 4); } -fn simple_range(start: usize, end: usize) -> Range { +#[test] +fn test_grammars_that_can_hang_on_eof() { + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_single_null_char_regex", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ + { "type": "STRING", "value": "\"" }, + { "type": "PATTERN", "value": "[\\x00]*" }, + { "type": "STRING", "value": "\"" } + ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ] + } + "#, + ) + .unwrap(); + + let mut parser = Parser::new(); + parser + .set_language(&get_test_language(&parser_name, &parser_code, None)) + .unwrap(); + parser.parse("\"", None).unwrap(); + + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_null_char_with_next_char_regex", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ + { "type": "STRING", "value": "\"" }, + { "type": "PATTERN", "value": "[\\x00-\\x01]*" }, + { "type": "STRING", "value": "\"" } + ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ] + } + "#, + ) + .unwrap(); + + parser + .set_language(&get_test_language(&parser_name, &parser_code, None)) + .unwrap(); + parser.parse("\"", None).unwrap(); + + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_null_char_with_range_regex", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ + { "type": "STRING", "value": "\"" }, + { "type": "PATTERN", "value": "[\\x00-\\x7F]*" }, + { "type": "STRING", "value": "\"" } + ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ] + } + "#, + ) + .unwrap(); + + parser + .set_language(&get_test_language(&parser_name, &parser_code, None)) + .unwrap(); + parser.parse("\"", None).unwrap(); +} + +#[test] +fn test_parse_stack_recursive_merge_error_cost_calculation_bug() { + let source_code = r#" +fn main() { + if n == 1 { + } else if n == 2 { + } else { + } +} + +let y = if x == 5 { 10 } else { 15 }; + +if foo && bar {} + +if foo && bar || baz {} +"#; + + let mut parser = Parser::new(); + parser.set_language(&get_language("rust")).unwrap(); + + let mut tree = parser.parse(source_code, None).unwrap(); + + let edit = Edit { + position: 60, + deleted_length: 63, + inserted_text: Vec::new(), + }; + let mut input = source_code.as_bytes().to_vec(); + perform_edit(&mut tree, &mut input, &edit).unwrap(); + + parser.parse(&input, Some(&tree)).unwrap(); +} + +const fn simple_range(start: usize, end: usize) -> Range { Range { start_byte: start, end_byte: end, diff --git a/cli/src/tests/pathological_test.rs b/cli/src/tests/pathological_test.rs index ec10884..6e008a6 100644 --- a/cli/src/tests/pathological_test.rs +++ b/cli/src/tests/pathological_test.rs @@ -1,6 +1,7 @@ -use super::helpers::{allocations, fixtures::get_language}; use tree_sitter::Parser; +use super::helpers::{allocations, fixtures::get_language}; + #[test] fn test_pathological_example_1() { let language = "cpp"; @@ -8,7 +9,7 @@ fn test_pathological_example_1() { allocations::record(|| { let mut parser = Parser::new(); - parser.set_language(get_language(language)).unwrap(); + parser.set_language(&get_language(language)).unwrap(); parser.parse(source, None).unwrap(); }); } diff --git a/cli/src/tests/proc_macro/Cargo.toml b/cli/src/tests/proc_macro/Cargo.toml index e6900d1..ade4d61 100644 --- a/cli/src/tests/proc_macro/Cargo.toml +++ b/cli/src/tests/proc_macro/Cargo.toml @@ -1,15 +1,15 @@ [package] -name = "proc_macro" -version = "0.1.0" -edition = "2021" -publish = false +name = "tree-sitter-tests-proc-macro" +version = "0.0.0" +edition.workspace = true rust-version.workspace = true +publish = false [lib] proc-macro = true [dependencies] -proc-macro2 = "1" -quote = "1" +proc-macro2 = "1.0.78" +quote = "1.0.35" rand = "0.8.5" -syn = { version = "1", features = ["full"] } +syn = { version = "2.0.52", features = ["full"] } diff --git a/cli/src/tests/proc_macro/src/lib.rs b/cli/src/tests/proc_macro/src/lib.rs index d831a75..a63006c 100644 --- a/cli/src/tests/proc_macro/src/lib.rs +++ b/cli/src/tests/proc_macro/src/lib.rs @@ -10,8 +10,8 @@ use syn::{ pub fn retry(args: TokenStream, input: TokenStream) -> TokenStream { let count = parse_macro_input!(args as LitInt); let input = parse_macro_input!(input as ItemFn); - let attrs = input.attrs.clone(); - let name = input.sig.ident.clone(); + let attrs = &input.attrs; + let name = &input.sig.ident; TokenStream::from(quote! { #(#attrs),* @@ -81,9 +81,9 @@ pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream { retry.replace(LitInt::new("0", Span::mixed_site())); } - Ok(Args { - retry: retry.expect("`retry` parameter is requred"), - seed: seed.expect("`initial_seed` parameter is required"), + Ok(Self { + retry: retry.expect("`retry` parameter is required"), + seed: seed.expect("`seed` parameter is required"), seed_fn, }) } @@ -98,10 +98,8 @@ pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream { let seed_fn = seed_fn.iter(); let func = parse_macro_input!(input as ItemFn); - let attrs = func.attrs.clone(); - let name = func.sig.ident.clone(); - - // dbg!(quote::ToTokens::into_token_stream(&func)); + let attrs = &func.attrs; + let name = &func.sig.ident; TokenStream::from(quote! { #[test] diff --git a/cli/src/tests/query_test.rs b/cli/src/tests/query_test.rs index 7d01c26..82ca1b4 100644 --- a/cli/src/tests/query_test.rs +++ b/cli/src/tests/query_test.rs @@ -1,18 +1,25 @@ -use super::helpers::{ - allocations, - fixtures::get_language, - query_helpers::{Match, Pattern}, - ITERATION_COUNT, -}; +use std::{env, fmt::Write}; + +use indoc::indoc; use lazy_static::lazy_static; use rand::{prelude::StdRng, SeedableRng}; -use std::{env, fmt::Write}; use tree_sitter::{ - CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryError, - QueryErrorKind, QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty, + CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCursor, QueryError, + QueryErrorKind, QueryPredicate, QueryPredicateArg, QueryProperty, }; use unindent::Unindent; +use super::helpers::{ + allocations, + fixtures::{get_language, get_test_language}, + query_helpers::{assert_query_matches, Match, Pattern}, + ITERATION_COUNT, +}; +use crate::{ + generate::generate_parser_for_grammar, + tests::helpers::query_helpers::{collect_captures, collect_matches}, +}; + lazy_static! { static ref EXAMPLE_FILTER: Option = env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok(); } @@ -22,16 +29,16 @@ fn test_query_errors_on_invalid_syntax() { allocations::record(|| { let language = get_language("javascript"); - assert!(Query::new(language, "(if_statement)").is_ok()); + assert!(Query::new(&language, "(if_statement)").is_ok()); assert!(Query::new( - language, + &language, "(if_statement condition:(parenthesized_expression (identifier)))" ) .is_ok()); // Mismatched parens assert_eq!( - Query::new(language, "(if_statement").unwrap_err().message, + Query::new(&language, "(if_statement").unwrap_err().message, [ "(if_statement", // " ^", @@ -39,7 +46,7 @@ fn test_query_errors_on_invalid_syntax() { .join("\n") ); assert_eq!( - Query::new(language, "; comment 1\n; comment 2\n (if_statement))") + Query::new(&language, "; comment 1\n; comment 2\n (if_statement))") .unwrap_err() .message, [ @@ -52,7 +59,7 @@ fn test_query_errors_on_invalid_syntax() { // Return an error at the *beginning* of a bare identifier not followed a colon. // If there's a colon but no pattern, return an error at the end of the colon. assert_eq!( - Query::new(language, "(if_statement identifier)") + Query::new(&language, "(if_statement identifier)") .unwrap_err() .message, [ @@ -62,7 +69,7 @@ fn test_query_errors_on_invalid_syntax() { .join("\n") ); assert_eq!( - Query::new(language, "(if_statement condition:)") + Query::new(&language, "(if_statement condition:)") .unwrap_err() .message, [ @@ -74,19 +81,19 @@ fn test_query_errors_on_invalid_syntax() { // Return an error at the beginning of an unterminated string. assert_eq!( - Query::new(language, r#"(identifier) "h "#) + Query::new(&language, r#"(identifier) "h "#) .unwrap_err() .message, [ r#"(identifier) "h "#, // - r#" ^"#, + r" ^", ] .join("\n") ); // Empty tree pattern assert_eq!( - Query::new(language, r#"((identifier) ()"#) + Query::new(&language, r"((identifier) ()") .unwrap_err() .message, [ @@ -98,7 +105,7 @@ fn test_query_errors_on_invalid_syntax() { // Empty alternation assert_eq!( - Query::new(language, r#"((identifier) [])"#) + Query::new(&language, r"((identifier) [])") .unwrap_err() .message, [ @@ -110,7 +117,7 @@ fn test_query_errors_on_invalid_syntax() { // Unclosed sibling expression with predicate assert_eq!( - Query::new(language, r#"((identifier) (#a)"#) + Query::new(&language, r"((identifier) (#a)") .unwrap_err() .message, [ @@ -122,37 +129,37 @@ fn test_query_errors_on_invalid_syntax() { // Unclosed predicate assert_eq!( - Query::new(language, r#"((identifier) @x (#eq? @x a"#) + Query::new(&language, r"((identifier) @x (#eq? @x a") .unwrap_err() .message, [ - r#"((identifier) @x (#eq? @x a"#, - r#" ^"#, + r"((identifier) @x (#eq? @x a", + r" ^", ] .join("\n") ); // Need at least one child node for a child anchor assert_eq!( - Query::new(language, r#"(statement_block .)"#) + Query::new(&language, r"(statement_block .)") .unwrap_err() .message, [ // - r#"(statement_block .)"#, - r#" ^"# + r"(statement_block .)", + r" ^" ] .join("\n") ); // Need a field name after a negated field operator assert_eq!( - Query::new(language, r#"(statement_block ! (if_statement))"#) + Query::new(&language, r"(statement_block ! (if_statement))") .unwrap_err() .message, [ - r#"(statement_block ! (if_statement))"#, - r#" ^"# + r"(statement_block ! (if_statement))", + r" ^" ] .join("\n") ); @@ -160,12 +167,12 @@ fn test_query_errors_on_invalid_syntax() { // Unclosed alternation within a tree // tree-sitter/tree-sitter/issues/968 assert_eq!( - Query::new(get_language("c"), r#"(parameter_list [ ")" @foo)"#) + Query::new(&get_language("c"), r#"(parameter_list [ ")" @foo)"#) .unwrap_err() .message, [ r#"(parameter_list [ ")" @foo)"#, - r#" ^"# + r" ^" ] .join("\n") ); @@ -174,14 +181,14 @@ fn test_query_errors_on_invalid_syntax() { // tree-sitter/tree-sitter/issues/1436 assert_eq!( Query::new( - get_language("python"), - r#"[(unary_operator (_) @operand) (not_operator (_) @operand]"# + &get_language("python"), + r"[(unary_operator (_) @operand) (not_operator (_) @operand]" ) .unwrap_err() .message, [ - r#"[(unary_operator (_) @operand) (not_operator (_) @operand]"#, - r#" ^"# + r"[(unary_operator (_) @operand) (not_operator (_) @operand]", + r" ^" ] .join("\n") ); @@ -194,7 +201,7 @@ fn test_query_errors_on_invalid_symbols() { let language = get_language("javascript"); assert_eq!( - Query::new(language, "(clas)").unwrap_err(), + Query::new(&language, "(clas)").unwrap_err(), QueryError { row: 0, offset: 1, @@ -204,7 +211,7 @@ fn test_query_errors_on_invalid_symbols() { } ); assert_eq!( - Query::new(language, "(if_statement (arrayyyyy))").unwrap_err(), + Query::new(&language, "(if_statement (arrayyyyy))").unwrap_err(), QueryError { row: 0, offset: 15, @@ -214,7 +221,7 @@ fn test_query_errors_on_invalid_symbols() { }, ); assert_eq!( - Query::new(language, "(if_statement condition: (non_existent3))").unwrap_err(), + Query::new(&language, "(if_statement condition: (non_existent3))").unwrap_err(), QueryError { row: 0, offset: 26, @@ -224,7 +231,7 @@ fn test_query_errors_on_invalid_symbols() { }, ); assert_eq!( - Query::new(language, "(if_statement condit: (identifier))").unwrap_err(), + Query::new(&language, "(if_statement condit: (identifier))").unwrap_err(), QueryError { row: 0, offset: 14, @@ -234,7 +241,7 @@ fn test_query_errors_on_invalid_symbols() { }, ); assert_eq!( - Query::new(language, "(if_statement conditioning: (identifier))").unwrap_err(), + Query::new(&language, "(if_statement conditioning: (identifier))").unwrap_err(), QueryError { row: 0, offset: 14, @@ -244,7 +251,7 @@ fn test_query_errors_on_invalid_symbols() { } ); assert_eq!( - Query::new(language, "(if_statement !alternativ)").unwrap_err(), + Query::new(&language, "(if_statement !alternativ)").unwrap_err(), QueryError { row: 0, offset: 15, @@ -254,7 +261,7 @@ fn test_query_errors_on_invalid_symbols() { } ); assert_eq!( - Query::new(language, "(if_statement !alternatives)").unwrap_err(), + Query::new(&language, "(if_statement !alternatives)").unwrap_err(), QueryError { row: 0, offset: 15, @@ -272,7 +279,7 @@ fn test_query_errors_on_invalid_predicates() { let language = get_language("javascript"); assert_eq!( - Query::new(language, "((identifier) @id (@id))").unwrap_err(), + Query::new(&language, "((identifier) @id (@id))").unwrap_err(), QueryError { kind: QueryErrorKind::Syntax, row: 0, @@ -286,7 +293,7 @@ fn test_query_errors_on_invalid_predicates() { } ); assert_eq!( - Query::new(language, "((identifier) @id (#eq? @id))").unwrap_err(), + Query::new(&language, "((identifier) @id (#eq? @id))").unwrap_err(), QueryError { kind: QueryErrorKind::Predicate, row: 0, @@ -297,7 +304,7 @@ fn test_query_errors_on_invalid_predicates() { } ); assert_eq!( - Query::new(language, "((identifier) @id (#eq? @id @ok))").unwrap_err(), + Query::new(&language, "((identifier) @id (#eq? @id @ok))").unwrap_err(), QueryError { kind: QueryErrorKind::Capture, row: 0, @@ -317,29 +324,29 @@ fn test_query_errors_on_impossible_patterns() { allocations::record(|| { assert_eq!( Query::new( - js_lang, - "(binary_expression left: (identifier) left: (identifier))" + &js_lang, + "(binary_expression left: (expression (identifier)) left: (expression (identifier)))" ), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, - offset: 38, - column: 38, + offset: 51, + column: 51, message: [ - "(binary_expression left: (identifier) left: (identifier))", - " ^" + "(binary_expression left: (expression (identifier)) left: (expression (identifier)))", + " ^", ] .join("\n"), }) ); Query::new( - js_lang, + &js_lang, "(function_declaration name: (identifier) (statement_block))", ) .unwrap(); assert_eq!( - Query::new(js_lang, "(function_declaration name: (statement_block))"), + Query::new(&js_lang, "(function_declaration name: (statement_block))"), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, @@ -353,9 +360,9 @@ fn test_query_errors_on_impossible_patterns() { }) ); - Query::new(rb_lang, "(call receiver:(call))").unwrap(); + Query::new(&rb_lang, "(call receiver:(call))").unwrap(); assert_eq!( - Query::new(rb_lang, "(call receiver:(binary))"), + Query::new(&rb_lang, "(call receiver:(binary))"), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, @@ -370,9 +377,9 @@ fn test_query_errors_on_impossible_patterns() { ); Query::new( - js_lang, + &js_lang, "[ - (function (identifier)) + (function_expression (identifier)) (function_declaration (identifier)) (generator_function_declaration (identifier)) ]", @@ -380,9 +387,9 @@ fn test_query_errors_on_impossible_patterns() { .unwrap(); assert_eq!( Query::new( - js_lang, + &js_lang, "[ - (function (identifier)) + (function_expression (identifier)) (function_declaration (object)) (generator_function_declaration (identifier)) ]", @@ -390,7 +397,7 @@ fn test_query_errors_on_impossible_patterns() { Err(QueryError { kind: QueryErrorKind::Structure, row: 2, - offset: 88, + offset: 99, column: 42, message: [ " (function_declaration (object))", // @@ -401,7 +408,7 @@ fn test_query_errors_on_impossible_patterns() { ); assert_eq!( - Query::new(js_lang, "(identifier (identifier))",), + Query::new(&js_lang, "(identifier (identifier))",), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, @@ -415,7 +422,7 @@ fn test_query_errors_on_impossible_patterns() { }) ); assert_eq!( - Query::new(js_lang, "(true (true))",), + Query::new(&js_lang, "(true (true))",), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, @@ -430,21 +437,21 @@ fn test_query_errors_on_impossible_patterns() { ); Query::new( - js_lang, + &js_lang, "(if_statement - condition: (parenthesized_expression (_expression) @cond))", + condition: (parenthesized_expression (expression) @cond))", ) .unwrap(); assert_eq!( - Query::new(js_lang, "(if_statement condition: (_expression))",), + Query::new(&js_lang, "(if_statement condition: (expression))"), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, offset: 14, column: 14, message: [ - "(if_statement condition: (_expression))", // + "(if_statement condition: (expression))", // " ^", ] .join("\n") @@ -456,12 +463,12 @@ fn test_query_errors_on_impossible_patterns() { #[test] fn test_query_verifies_possible_patterns_with_aliased_parent_nodes() { allocations::record(|| { - let ruby = get_language("ruby"); + let language = get_language("ruby"); - Query::new(ruby, "(destructured_parameter (identifier))").unwrap(); + Query::new(&language, "(destructured_parameter (identifier))").unwrap(); assert_eq!( - Query::new(ruby, "(destructured_parameter (string))",), + Query::new(&language, "(destructured_parameter (string))",), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, @@ -482,13 +489,13 @@ fn test_query_matches_with_simple_pattern() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, "(function_declaration name: (identifier) @fn-name)", ) .unwrap(); assert_query_matches( - language, + &language, &query, "function one() { two(); function three() {} }", &[ @@ -504,7 +511,7 @@ fn test_query_matches_with_multiple_on_same_root() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, "(class_declaration name: (identifier) @the-class-name (class_body @@ -514,7 +521,7 @@ fn test_query_matches_with_multiple_on_same_root() { .unwrap(); assert_query_matches( - language, + &language, &query, " class Person { @@ -550,7 +557,7 @@ fn test_query_matches_with_multiple_patterns_different_roots() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (function_declaration name:(identifier) @fn-def) (call_expression function:(identifier) @fn-ref) @@ -559,7 +566,7 @@ fn test_query_matches_with_multiple_patterns_different_roots() { .unwrap(); assert_query_matches( - language, + &language, &query, " function f1() { @@ -580,11 +587,11 @@ fn test_query_matches_with_multiple_patterns_same_root() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (pair key: (property_identifier) @method-def - value: (function)) + value: (function_expression)) (pair key: (property_identifier) @method-def @@ -594,7 +601,7 @@ fn test_query_matches_with_multiple_patterns_same_root() { .unwrap(); assert_query_matches( - language, + &language, &query, " a = { @@ -615,7 +622,7 @@ fn test_query_matches_with_nesting_and_no_fields() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (array (array @@ -626,7 +633,7 @@ fn test_query_matches_with_nesting_and_no_fields() { .unwrap(); assert_query_matches( - language, + &language, &query, " [[a]]; @@ -650,10 +657,10 @@ fn test_query_matches_with_nesting_and_no_fields() { fn test_query_matches_with_many_results() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, "(array (identifier) @element)").unwrap(); + let query = Query::new(&language, "(array (identifier) @element)").unwrap(); assert_query_matches( - language, + &language, &query, &"[hello];\n".repeat(50), &vec![(0, vec![("element", "hello")]); 50], @@ -666,7 +673,7 @@ fn test_query_matches_with_many_overlapping_results() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" (call_expression function: (member_expression @@ -691,7 +698,7 @@ fn test_query_matches_with_many_overlapping_results() { source += &"\n .foo(bar(BAZ))".repeat(count); assert_query_matches( - language, + &language, &query, &source, &[ @@ -713,7 +720,7 @@ fn test_query_matches_capturing_error_nodes() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (ERROR (identifier) @the-error-identifier) @the-error ", @@ -721,7 +728,7 @@ fn test_query_matches_capturing_error_nodes() { .unwrap(); assert_query_matches( - language, + &language, &query, "function a(b,, c, d :e:) {}", &[(0, vec![("the-error", ":e:"), ("the-error-identifier", "e")])], @@ -734,7 +741,7 @@ fn test_query_matches_with_extra_children() { allocations::record(|| { let language = get_language("ruby"); let query = Query::new( - language, + &language, " (program(comment) @top_level_comment) (argument_list (heredoc_body) @heredoc_in_args) @@ -743,7 +750,7 @@ fn test_query_matches_with_extra_children() { .unwrap(); assert_query_matches( - language, + &language, &query, " # top-level @@ -777,7 +784,7 @@ fn test_query_matches_with_named_wildcard() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (return_statement (_) @the-return-value) (binary_expression operator: _ @the-operator) @@ -788,7 +795,7 @@ fn test_query_matches_with_named_wildcard() { let source = "return a + b - c;"; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); @@ -809,7 +816,7 @@ fn test_query_matches_with_wildcard_at_the_root() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (_ (comment) @doc @@ -821,14 +828,14 @@ fn test_query_matches_with_wildcard_at_the_root() { .unwrap(); assert_query_matches( - language, + &language, &query, "/* one */ var x; /* two */ function y() {} /* three */ class Z {}", &[(0, vec![("doc", "/* two */"), ("name", "y")])], ); let query = Query::new( - language, + &language, " (_ (string) @a) (_ (number) @b) @@ -839,7 +846,7 @@ fn test_query_matches_with_wildcard_at_the_root() { .unwrap(); assert_query_matches( - language, + &language, &query, "['hi', x(true), {y: false}]", &[ @@ -851,20 +858,47 @@ fn test_query_matches_with_wildcard_at_the_root() { }); } +#[test] +fn test_query_matches_with_wildcard_within_wildcard() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + &language, + " + (_ (_) @child) @parent + ", + ) + .unwrap(); + + assert_query_matches( + &language, + &query, + "/* a */ b; c;", + &[ + (0, vec![("parent", "/* a */ b; c;"), ("child", "/* a */")]), + (0, vec![("parent", "/* a */ b; c;"), ("child", "b;")]), + (0, vec![("parent", "b;"), ("child", "b")]), + (0, vec![("parent", "/* a */ b; c;"), ("child", "c;")]), + (0, vec![("parent", "c;"), ("child", "c")]), + ], + ); + }); +} + #[test] fn test_query_matches_with_immediate_siblings() { allocations::record(|| { let language = get_language("python"); // The immediate child operator '.' can be used in three similar ways: - // 1. Before the first child node in a pattern, it means that there cannot be any - // named siblings before that child node. + // 1. Before the first child node in a pattern, it means that there cannot be any named + // siblings before that child node. // 2. After the last child node in a pattern, it means that there cannot be any named // sibling after that child node. - // 2. Between two child nodes in a pattern, it specifies that there cannot be any - // named siblings between those two child snodes. + // 2. Between two child nodes in a pattern, it specifies that there cannot be any named + // siblings between those two child snodes. let query = Query::new( - language, + &language, " (dotted_name (identifier) @parent @@ -881,7 +915,7 @@ fn test_query_matches_with_immediate_siblings() { .unwrap(); assert_query_matches( - language, + &language, &query, "import a.b.c.d; return [w, [1, y], z]", &[ @@ -895,7 +929,7 @@ fn test_query_matches_with_immediate_siblings() { ); let query = Query::new( - language, + &language, " (block . (_) @first-stmt) (block (_) @stmt) @@ -905,7 +939,7 @@ fn test_query_matches_with_immediate_siblings() { .unwrap(); assert_query_matches( - language, + &language, &query, " if a: @@ -935,7 +969,7 @@ fn test_query_matches_with_last_named_child() { allocations::record(|| { let language = get_language("c"); let query = Query::new( - language, + &language, "(compound_statement (_) (_) @@ -944,7 +978,7 @@ fn test_query_matches_with_last_named_child() { ) .unwrap(); assert_query_matches( - language, + &language, &query, " void one() { a; b; c; } @@ -961,7 +995,7 @@ fn test_query_matches_with_negated_fields() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (import_specifier !alias @@ -994,7 +1028,7 @@ fn test_query_matches_with_negated_fields() { ) .unwrap(); assert_query_matches( - language, + &language, &query, " import {a as b, c} from 'p1'; @@ -1025,9 +1059,9 @@ fn test_query_matches_with_negated_fields() { fn test_query_matches_with_field_at_root() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, "name: (identifier) @name").unwrap(); + let query = Query::new(&language, "name: (identifier) @name").unwrap(); assert_query_matches( - language, + &language, &query, " a(); @@ -1045,7 +1079,7 @@ fn test_query_matches_with_repeated_leaf_nodes() { let language = get_language("javascript"); let query = Query::new( - language, + &language, " ( (comment)+ @doc @@ -1065,7 +1099,7 @@ fn test_query_matches_with_repeated_leaf_nodes() { .unwrap(); assert_query_matches( - language, + &language, &query, " // one @@ -1106,14 +1140,14 @@ fn test_query_matches_with_repeated_leaf_nodes() { fn test_query_matches_with_optional_nodes_inside_of_repetitions() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, r#"(array (","? (number) @num)+)"#).unwrap(); + let query = Query::new(&language, r#"(array (","? (number) @num)+)"#).unwrap(); assert_query_matches( - language, + &language, &query, - r#" + r" var a = [1, 2, 3, 4] - "#, + ", &[( 0, vec![("num", "1"), ("num", "2"), ("num", "3"), ("num", "4")], @@ -1127,17 +1161,17 @@ fn test_query_matches_with_top_level_repetitions() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (comment)+ @doc - "#, + ", ) .unwrap(); assert_query_matches( - language, + &language, &query, - r#" + r" // a // b // c @@ -1145,7 +1179,7 @@ fn test_query_matches_with_top_level_repetitions() { d() // e - "#, + ", &[ (0, vec![("doc", "// a"), ("doc", "// b"), ("doc", "// c")]), (0, vec![("doc", "// e")]), @@ -1158,17 +1192,26 @@ fn test_query_matches_with_top_level_repetitions() { fn test_query_matches_with_non_terminal_repetitions_within_root() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, "(_ (expression_statement (identifier) @id)+)").unwrap(); + let query = Query::new(&language, "(_ (expression_statement (identifier) @id)+)").unwrap(); assert_query_matches( - language, + &language, &query, - r#" + r" + function f() { + d; + e; + f; + g; + } a; b; c; - "#, - &[(0, vec![("id", "a"), ("id", "b"), ("id", "c")])], + ", + &[ + (0, vec![("id", "d"), ("id", "e"), ("id", "f"), ("id", "g")]), + (0, vec![("id", "a"), ("id", "b"), ("id", "c")]), + ], ); }); } @@ -1178,7 +1221,7 @@ fn test_query_matches_with_nested_repetitions() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" (variable_declaration (","? (variable_declarator name: (identifier) @x))+)+ @@ -1187,15 +1230,15 @@ fn test_query_matches_with_nested_repetitions() { .unwrap(); assert_query_matches( - language, + &language, &query, - r#" + r" var a = b, c, d var e, f // more var g - "#, + ", &[ ( 0, @@ -1215,8 +1258,8 @@ fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pat // When this query sees a comment, it must keep track of several potential // matches: up to two for each pattern that begins with a comment. let query = Query::new( - language, - r#" + &language, + r" (call_expression function: (member_expression property: (property_identifier) @name)) @ref.method @@ -1229,7 +1272,7 @@ fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pat ((comment)* @doc (method_definition)) (comment) @comment - "#, + ", ) .unwrap(); @@ -1242,7 +1285,7 @@ fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pat ); assert_query_matches( - language, + &language, &query, &source, &vec![(7, vec![("comment", "// the comment")]); 64] @@ -1262,7 +1305,7 @@ fn test_query_matches_with_trailing_repetitions_of_last_child() { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (unary_expression (primary_expression)+ @operand) ", @@ -1270,7 +1313,7 @@ fn test_query_matches_with_trailing_repetitions_of_last_child() { .unwrap(); assert_query_matches( - language, + &language, &query, " a = typeof (!b && ~c); @@ -1290,7 +1333,7 @@ fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() { let language = get_language("javascript"); let query = Query::new( - language, + &language, " ( (comment)* @doc @@ -1303,7 +1346,7 @@ fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() { .unwrap(); assert_query_matches( - language, + &language, &query, " function a() { @@ -1343,7 +1386,7 @@ fn test_query_matches_with_trailing_optional_nodes() { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (class_declaration name: (identifier) @class @@ -1353,10 +1396,15 @@ fn test_query_matches_with_trailing_optional_nodes() { ) .unwrap(); - assert_query_matches(language, &query, "class A {}", &[(0, vec![("class", "A")])]); + assert_query_matches( + &language, + &query, + "class A {}", + &[(0, vec![("class", "A")])], + ); assert_query_matches( - language, + &language, &query, " class A {} @@ -1377,9 +1425,10 @@ fn test_query_matches_with_nested_optional_nodes() { allocations::record(|| { let language = get_language("javascript"); - // A function call, optionally containing a function call, which optionally contains a number + // A function call, optionally containing a function call, which optionally contains a + // number let query = Query::new( - language, + &language, " (call_expression function: (identifier) @outer-fn @@ -1393,13 +1442,13 @@ fn test_query_matches_with_nested_optional_nodes() { .unwrap(); assert_query_matches( - language, + &language, &query, - r#" + r" a(b, c(), d(null, 1, 2)) e() f(g()) - "#, + ", &[ (0, vec![("outer-fn", "a"), ("inner-fn", "c")]), (0, vec![("outer-fn", "c")]), @@ -1419,7 +1468,7 @@ fn test_query_matches_with_repeated_internal_nodes() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (_ (method_definition @@ -1430,7 +1479,7 @@ fn test_query_matches_with_repeated_internal_nodes() { .unwrap(); assert_query_matches( - language, + &language, &query, " class A { @@ -1441,7 +1490,7 @@ fn test_query_matches_with_repeated_internal_nodes() { ", &[(0, vec![("deco", "c"), ("deco", "d"), ("name", "e")])], ); - }) + }); } #[test] @@ -1449,17 +1498,17 @@ fn test_query_matches_with_simple_alternatives() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (pair key: [(property_identifier) (string)] @key - value: [(function) @val1 (arrow_function) @val2]) + value: [(function_expression) @val1 (arrow_function) @val2]) ", ) .unwrap(); assert_query_matches( - language, + &language, &query, " a = { @@ -1480,7 +1529,7 @@ fn test_query_matches_with_simple_alternatives() { (0, vec![("key", "'l'"), ("val1", "function m() {}")]), ], ); - }) + }); } #[test] @@ -1488,7 +1537,7 @@ fn test_query_matches_with_alternatives_in_repetitions() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" (array [(identifier) (string)] @el @@ -1503,7 +1552,7 @@ fn test_query_matches_with_alternatives_in_repetitions() { .unwrap(); assert_query_matches( - language, + &language, &query, " a = [b, 'c', d, 1, e, 'f', 'g', h]; @@ -1516,7 +1565,7 @@ fn test_query_matches_with_alternatives_in_repetitions() { ), ], ); - }) + }); } #[test] @@ -1524,7 +1573,7 @@ fn test_query_matches_with_alternatives_at_root() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" [ "if" @@ -1538,7 +1587,7 @@ fn test_query_matches_with_alternatives_at_root() { .unwrap(); assert_query_matches( - language, + &language, &query, " function a(b, c, d) { @@ -1557,7 +1606,7 @@ fn test_query_matches_with_alternatives_at_root() { (0, vec![("keyword", "throw")]), ], ); - }) + }); } #[test] @@ -1565,19 +1614,19 @@ fn test_query_matches_with_alternatives_under_fields() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (assignment_expression left: [ (identifier) @variable (member_expression property: (property_identifier) @variable) ]) - "#, + ", ) .unwrap(); assert_query_matches( - language, + &language, &query, " a = b; @@ -1603,10 +1652,10 @@ fn test_query_matches_in_language_with_simple_aliases() { // HTML uses different tokens to track start tags names, end // tag names, script tag names, and style tag names. All of // these tokens are aliased to `tag_name`. - let query = Query::new(language, "(tag_name) @tag").unwrap(); + let query = Query::new(&language, "(tag_name) @tag").unwrap(); assert_query_matches( - language, + &language, &query, "
@@ -1633,7 +1682,7 @@ fn test_query_matches_with_different_tokens_with_the_same_string_value() { // and one with higher precedence for generics. let language = get_language("rust"); let query = Query::new( - language, + &language, r#" "<" @less ">" @greater @@ -1642,7 +1691,7 @@ fn test_query_matches_with_different_tokens_with_the_same_string_value() { .unwrap(); assert_query_matches( - language, + &language, &query, "const A: B = d < e || f > g;", &[ @@ -1660,7 +1709,7 @@ fn test_query_matches_with_too_many_permutations_to_track() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (array (identifier) @pre (identifier) @post) ", @@ -1672,7 +1721,7 @@ fn test_query_matches_with_too_many_permutations_to_track() { source.push_str("];"); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); cursor.set_match_limit(32); @@ -1685,7 +1734,7 @@ fn test_query_matches_with_too_many_permutations_to_track() { collect_matches(matches, &query, source.as_str())[0], (0, vec![("pre", "hello"), ("post", "hello")]), ); - assert_eq!(cursor.did_exceed_match_limit(), true); + assert!(cursor.did_exceed_match_limit()); }); } @@ -1694,7 +1743,7 @@ fn test_query_sibling_patterns_dont_match_children_of_an_error() { allocations::record(|| { let language = get_language("rust"); let query = Query::new( - language, + &language, r#" ("{" @open "}" @close) @@ -1733,8 +1782,8 @@ fn test_query_sibling_patterns_dont_match_children_of_an_error() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); assert_eq!( @@ -1754,7 +1803,7 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " ( (comment) @doc @@ -1774,7 +1823,7 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { let source = "/* hi */ a.b(); ".repeat(50); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); cursor.set_match_limit(32); @@ -1784,7 +1833,54 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { collect_matches(matches, &query, source.as_str()), vec![(1, vec![("method", "b")]); 50], ); - assert_eq!(cursor.did_exceed_match_limit(), true); + assert!(cursor.did_exceed_match_limit()); + }); +} + +#[test] +fn test_repetitions_before_with_alternatives() { + allocations::record(|| { + let language = get_language("rust"); + let query = Query::new( + &language, + r" + ( + (line_comment)* @comment + . + [ + (struct_item name: (_) @name) + (function_item name: (_) @name) + (enum_item name: (_) @name) + (impl_item type: (_) @name) + ] + ) + ", + ) + .unwrap(); + + assert_query_matches( + &language, + &query, + r" + // a + // b + fn c() {} + + // d + // e + impl F {} + ", + &[ + ( + 0, + vec![("comment", "// a"), ("comment", "// b"), ("name", "c")], + ), + ( + 0, + vec![("comment", "// d"), ("comment", "// e"), ("name", "F")], + ), + ], + ); }); } @@ -1793,7 +1889,7 @@ fn test_query_matches_with_anonymous_tokens() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" ";" @punctuation "&&" @operator @@ -1803,7 +1899,7 @@ fn test_query_matches_with_anonymous_tokens() { .unwrap(); assert_query_matches( - language, + &language, &query, r#"foo(a && "b");"#, &[ @@ -1821,8 +1917,8 @@ fn test_query_matches_with_supertypes() { allocations::record(|| { let language = get_language("python"); let query = Query::new( - language, - r#" + &language, + r" (argument_list (expression) @arg) (keyword_argument @@ -1832,12 +1928,12 @@ fn test_query_matches_with_supertypes() { left: (identifier) @var_def) (primary_expression/identifier) @var_ref - "#, + ", ) .unwrap(); assert_query_matches( - language, + &language, &query, " a = b.c( @@ -1859,16 +1955,17 @@ fn test_query_matches_with_supertypes() { } #[test] +#[allow(clippy::reversed_empty_ranges)] fn test_query_matches_within_byte_range() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, "(identifier) @element").unwrap(); + let query = Query::new(&language, "(identifier) @element").unwrap(); let source = "[a, b, c, d, e, f, g]"; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); @@ -1917,7 +2014,7 @@ fn test_query_matches_within_byte_range() { fn test_query_matches_within_point_range() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, "(identifier) @element").unwrap(); + let query = Query::new(&language, "(identifier) @element").unwrap(); let source = " [ @@ -1932,7 +2029,7 @@ fn test_query_matches_within_point_range() { .unindent(); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); @@ -1983,7 +2080,7 @@ fn test_query_captures_within_byte_range() { allocations::record(|| { let language = get_language("c"); let query = Query::new( - language, + &language, " (call_expression function: (identifier) @function @@ -1997,8 +2094,8 @@ fn test_query_captures_within_byte_range() { let source = r#"DEFUN ("safe-length", Fsafe_length, Ssafe_length, 1, 1, 0)"#; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = @@ -2017,12 +2114,79 @@ fn test_query_captures_within_byte_range() { }); } +#[test] +fn test_query_cursor_next_capture_with_byte_range() { + allocations::record(|| { + let language = get_language("python"); + let query = Query::new( + &language, + "(function_definition name: (identifier) @function) + (attribute attribute: (identifier) @property) + ((identifier) @variable)", + ) + .unwrap(); + + let source = "def func():\n foo.bar.baz()\n"; + // ^ ^ ^ ^ + // byte_pos 0 12 17 27 + // point_pos (0,0) (1,0) (1,5) (1,15) + + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + let mut cursor = QueryCursor::new(); + let captures = + cursor + .set_byte_range(12..17) + .captures(&query, tree.root_node(), source.as_bytes()); + + assert_eq!( + collect_captures(captures, &query, source), + &[("variable", "foo"),] + ); + }); +} + +#[test] +fn test_query_cursor_next_capture_with_point_range() { + allocations::record(|| { + let language = get_language("python"); + let query = Query::new( + &language, + "(function_definition name: (identifier) @function) + (attribute attribute: (identifier) @property) + ((identifier) @variable)", + ) + .unwrap(); + + let source = "def func():\n foo.bar.baz()\n"; + // ^ ^ ^ ^ + // byte_pos 0 12 17 27 + // point_pos (0,0) (1,0) (1,5) (1,15) + + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + let mut cursor = QueryCursor::new(); + let captures = cursor + .set_point_range(Point::new(1, 0)..Point::new(1, 5)) + .captures(&query, tree.root_node(), source.as_bytes()); + + assert_eq!( + collect_captures(captures, &query, source), + &[("variable", "foo"),] + ); + }); +} + #[test] fn test_query_matches_with_unrooted_patterns_intersecting_byte_range() { allocations::record(|| { let language = get_language("rust"); let query = Query::new( - language, + &language, r#" ("{" @left "}" @right) ("<" @left ">" @right) @@ -2033,8 +2197,8 @@ fn test_query_matches_with_unrooted_patterns_intersecting_byte_range() { let source = "mod a { fn a(f: B) { g(f) } }"; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); // within the type parameter list @@ -2076,7 +2240,7 @@ fn test_query_matches_with_wildcard_at_root_intersecting_byte_range() { allocations::record(|| { let language = get_language("python"); let query = Query::new( - language, + &language, " [ (_ body: (block)) @@ -2097,7 +2261,7 @@ fn test_query_matches_with_wildcard_at_root_intersecting_byte_range() { .trim(); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); @@ -2138,7 +2302,7 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { allocations::record(|| { let language = get_language("rust"); let query = Query::new( - language, + &language, r#" (function_item name: (identifier) @fn_name) @@ -2171,17 +2335,17 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); // Retrieve some captures let mut results = Vec::new(); for (mat, capture_ix) in captures.by_ref().take(5) { - let capture = mat.captures[capture_ix as usize]; + let capture = mat.captures[capture_ix]; results.push(( - query.capture_names()[capture.index as usize].as_str(), + query.capture_names()[capture.index as usize], &source[capture.node.byte_range()], )); } @@ -2202,9 +2366,9 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { results.clear(); captures.set_byte_range(source.find("Ok").unwrap()..source.len()); for (mat, capture_ix) in captures { - let capture = mat.captures[capture_ix as usize]; + let capture = mat.captures[capture_ix]; results.push(( - query.capture_names()[capture.index as usize].as_str(), + query.capture_names()[capture.index as usize], &source[capture.node.byte_range()], )); } @@ -2224,7 +2388,7 @@ fn test_query_matches_within_range_of_long_repetition() { allocations::record(|| { let language = get_language("rust"); let query = Query::new( - language, + &language, " (function_item name: (identifier) @fn-name) ", @@ -2251,7 +2415,7 @@ fn test_query_matches_within_range_of_long_repetition() { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let matches = cursor @@ -2275,14 +2439,14 @@ fn test_query_matches_different_queries_same_cursor() { allocations::record(|| { let language = get_language("javascript"); let query1 = Query::new( - language, + &language, " (array (identifier) @id1) ", ) .unwrap(); let query2 = Query::new( - language, + &language, " (array (identifier) @id1) (pair (identifier) @id2) @@ -2290,7 +2454,7 @@ fn test_query_matches_different_queries_same_cursor() { ) .unwrap(); let query3 = Query::new( - language, + &language, " (array (identifier) @id1) (pair (identifier) @id2) @@ -2304,8 +2468,8 @@ fn test_query_matches_different_queries_same_cursor() { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let matches = cursor.matches(&query1, tree.root_node(), source.as_bytes()); assert_eq!( @@ -2336,7 +2500,7 @@ fn test_query_matches_with_multiple_captures_on_a_node() { allocations::record(|| { let language = get_language("javascript"); let mut query = Query::new( - language, + &language, "(function_declaration (identifier) @name1 @name2 @name3 (statement_block) @body1 @body2)", @@ -2347,8 +2511,8 @@ fn test_query_matches_with_multiple_captures_on_a_node() { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); assert_eq!( @@ -2389,7 +2553,7 @@ fn test_query_matches_with_captured_wildcard_at_root() { allocations::record(|| { let language = get_language("python"); let query = Query::new( - language, + &language, " ; captured wildcard at the root (_ [ @@ -2435,8 +2599,8 @@ fn test_query_matches_with_captured_wildcard_at_root() { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let match_capture_names_and_rows = cursor .matches(&query, tree.root_node(), source.as_bytes()) @@ -2445,7 +2609,7 @@ fn test_query_matches_with_captured_wildcard_at_root() { .iter() .map(|c| { ( - query.capture_names()[c.index as usize].as_str(), + query.capture_names()[c.index as usize], c.node.kind(), c.node.start_position().row, ) @@ -2467,7 +2631,7 @@ fn test_query_matches_with_captured_wildcard_at_root() { vec![("stmt", "try_statement", 7), ("block", "block", 12)], vec![("stmt", "while_statement", 1), ("block", "block", 14)], ] - ) + ); }); } @@ -2476,16 +2640,16 @@ fn test_query_matches_with_no_captures() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (identifier) (string) @s - "#, + ", ) .unwrap(); assert_query_matches( - language, + &language, &query, " a = 'hi'; @@ -2506,13 +2670,13 @@ fn test_query_matches_with_repeated_fields() { allocations::record(|| { let language = get_language("c"); let query = Query::new( - language, + &language, "(field_declaration declarator: (field_identifier) @field)", ) .unwrap(); assert_query_matches( - language, + &language, &query, " struct S { @@ -2533,7 +2697,7 @@ fn test_query_matches_with_deeply_nested_patterns_with_fields() { allocations::record(|| { let language = get_language("python"); let query = Query::new( - language, + &language, " (call function: (_) @func @@ -2560,7 +2724,7 @@ fn test_query_matches_with_deeply_nested_patterns_with_fields() { .unwrap(); assert_query_matches( - language, + &language, &query, " a(1).b(2).c(3).d(4).e(5).f(6).g(7).h(8) @@ -2628,7 +2792,7 @@ fn test_query_matches_with_indefinite_step_containing_no_captures() { // https://github.com/tree-sitter/tree-sitter/issues/937 let language = get_language("c"); let query = Query::new( - language, + &language, "(struct_specifier name: (type_identifier) @name body: (field_declaration_list @@ -2638,7 +2802,7 @@ fn test_query_matches_with_indefinite_step_containing_no_captures() { .unwrap(); assert_query_matches( - language, + &language, &query, " struct LacksUnionField { @@ -2671,16 +2835,16 @@ fn test_query_captures_basic() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" (pair key: _ @method.def - (function + (function_expression name: (identifier) @method.alias)) (variable_declarator name: _ @function.def - value: (function + value: (function_expression name: (identifier) @function.alias)) ":" @delimiter @@ -2701,8 +2865,8 @@ fn test_query_captures_basic() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); @@ -2746,7 +2910,7 @@ fn test_query_captures_with_text_conditions() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" ((identifier) @constant (#match? @constant "^[A-Z]{2,}$")) @@ -2757,6 +2921,14 @@ fn test_query_captures_with_text_conditions() { ((identifier) @function.builtin (#eq? @function.builtin "require")) + ((identifier) @variable.builtin + (#any-of? @variable.builtin + "arguments" + "module" + "console" + "window" + "document")) + ((identifier) @variable (#not-match? @variable "^(lambda|load)$")) "#, @@ -2770,11 +2942,14 @@ fn test_query_captures_with_text_conditions() { lambda const ab = require('./ab'); new Cd(EF); + document; + module; + console; "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -2791,6 +2966,12 @@ fn test_query_captures_with_text_conditions() { ("constant", "EF"), ("constructor", "EF"), ("variable", "EF"), + ("variable.builtin", "document"), + ("variable", "document"), + ("variable.builtin", "module"), + ("variable", "module"), + ("variable.builtin", "console"), + ("variable", "console"), ], ); }); @@ -2802,8 +2983,8 @@ fn test_query_captures_with_predicates() { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" ((call_expression (identifier) @foo) (#set! name something) (#set! cool) @@ -2811,7 +2992,7 @@ fn test_query_captures_with_predicates() { ((property_identifier) @bar (#is? cool) - (#is-not? name something))"#, + (#is-not? name something))", ) .unwrap(); @@ -2829,7 +3010,8 @@ fn test_query_captures_with_predicates() { args: vec![ QueryPredicateArg::Capture(0), QueryPredicateArg::String("omg".to_string().into_boxed_str()), - ], + ] + .into_boxed_slice(), },] ); assert_eq!(query.property_settings(1), &[]); @@ -2841,6 +3023,26 @@ fn test_query_captures_with_predicates() { (QueryProperty::new("name", Some("something"), None), false), ] ); + + let source = "const a = window.b"; + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + let query = Query::new( + &language, + r#"((identifier) @variable.builtin + (#match? @variable.builtin "^(arguments|module|console|window|document)$") + (#is-not? local)) + "#, + ) + .unwrap(); + + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); + let matches = collect_matches(matches, &query, source); + + assert_eq!(matches, &[(0, vec![("variable.builtin", "window")])]); }); } @@ -2854,7 +3056,7 @@ fn test_query_captures_with_quoted_predicate_args() { // * escaped double quotes with \* // * literal backslashes with \\ let query = Query::new( - language, + &language, r#" ((call_expression (identifier) @foo) (#set! one "\"something\ngreat\"")) @@ -2896,14 +3098,14 @@ fn test_query_captures_with_duplicates() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (variable_declarator name: (identifier) @function - value: (function)) + value: (function_expression)) (identifier) @variable - "#, + ", ) .unwrap(); @@ -2912,8 +3114,8 @@ fn test_query_captures_with_duplicates() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -2931,7 +3133,7 @@ fn test_query_captures_with_many_nested_results_without_fields() { // Search for key-value pairs whose values are anonymous functions. let query = Query::new( - language, + &language, r#" (pair key: _ @method-def @@ -2951,12 +3153,12 @@ fn test_query_captures_with_many_nested_results_without_fields() { let method_count = 50; let mut source = "x = { y: {\n".to_owned(); for i in 0..method_count { - writeln!(&mut source, " method{}: $ => null,", i).unwrap(); + writeln!(&mut source, " method{i}: $ => null,").unwrap(); } source.push_str("}};\n"); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); @@ -2994,15 +3196,15 @@ fn test_query_captures_with_many_nested_results_with_fields() { // Search expressions like `a ? a.b : null` let query = Query::new( - language, - r#" + &language, + r" ((ternary_expression condition: (identifier) @left consequence: (member_expression object: (identifier) @right) alternative: (null)) (#eq? @left @right)) - "#, + ", ) .unwrap(); @@ -3011,12 +3213,12 @@ fn test_query_captures_with_many_nested_results_with_fields() { let count = 50; let mut source = "a ? {".to_owned(); for i in 0..count { - writeln!(&mut source, " x: y{} ? y{}.z : null,", i, i).unwrap(); + writeln!(&mut source, " x: y{i} ? y{i}.z : null,").unwrap(); } source.push_str("} : null;\n"); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); @@ -3068,15 +3270,15 @@ fn test_query_captures_with_too_many_nested_results() { // appearance. // 2. This pattern captures the root `call_expression`. // 3. This pattern's result also depends on the final child (the template string). - // 4. In between the `call_expression` and the possible `template_string`, there can - // be an arbitrarily deep subtree. + // 4. In between the `call_expression` and the possible `template_string`, there can be an + // arbitrarily deep subtree. // // This means that, if any patterns match *after* the initial `call_expression` is // captured, but before the final `template_string` is found, those matches must // be buffered, in order to prevent captures from being returned out-of-order. let query = Query::new( - language, - r#" + &language, + r" ;; easy 👇 (call_expression function: (member_expression @@ -3087,7 +3289,7 @@ fn test_query_captures_with_too_many_nested_results() { function: (member_expression property: (property_identifier) @template-tag) arguments: (template_string)) @template-call - "#, + ", ) .unwrap(); @@ -3114,12 +3316,12 @@ fn test_query_captures_with_too_many_nested_results() { .trim(); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); cursor.set_match_limit(32); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); - let captures = collect_captures(captures, &query, &source); + let captures = collect_captures(captures, &query, source); assert_eq!( &captures[0..4], @@ -3151,7 +3353,7 @@ fn test_query_captures_with_definite_pattern_containing_many_nested_matches() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" (array "[" @l-bracket @@ -3177,18 +3379,17 @@ fn test_query_captures_with_definite_pattern_containing_many_nested_matches() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); assert_eq!( collect_captures(captures, &query, source), - [("l-bracket", "[")] - .iter() + std::iter::once(&("l-bracket", "[")) .chain([("dot", "."); 40].iter()) - .chain([("r-bracket", "]")].iter()) - .cloned() + .chain(std::iter::once(&("r-bracket", "]"))) + .copied() .collect::>(), ); }); @@ -3199,12 +3400,12 @@ fn test_query_captures_ordered_by_both_start_and_end_positions() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (call_expression) @call (member_expression) @member (identifier) @variable - "#, + ", ) .unwrap(); @@ -3213,8 +3414,8 @@ fn test_query_captures_ordered_by_both_start_and_end_positions() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3239,13 +3440,13 @@ fn test_query_captures_with_matches_removed() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (binary_expression left: (identifier) @left operator: _ @op right: (identifier) @right) - "#, + ", ) .unwrap(); @@ -3254,8 +3455,8 @@ fn test_query_captures_with_matches_removed() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captured_strings = Vec::new(); @@ -3283,7 +3484,7 @@ fn test_query_captures_with_matches_removed_before_they_finish() { // namespace_import node always has "*", "as" and then an identifier // for children, so captures will be emitted eagerly for this pattern. let query = Query::new( - language, + &language, r#" (namespace_import "*" @star @@ -3298,8 +3499,8 @@ fn test_query_captures_with_matches_removed_before_they_finish() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captured_strings = Vec::new(); @@ -3325,10 +3526,10 @@ fn test_query_captures_and_matches_iterators_are_fused() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (comment) @comment - "#, + ", ) .unwrap(); @@ -3340,8 +3541,8 @@ fn test_query_captures_and_matches_iterators_are_fused() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3368,7 +3569,7 @@ fn test_query_text_callback_returns_chunks() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" ((identifier) @leading_upper (#match? @leading_upper "^[A-Z][A-Z_]*[a-z]")) @@ -3414,8 +3615,8 @@ fn test_query_text_callback_returns_chunks() { ); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), |node: Node| { chunks_in_range(node.byte_range()) @@ -3467,7 +3668,7 @@ fn test_query_start_byte_for_pattern() { source += patterns_2; source += patterns_3; - let query = Query::new(language, &source).unwrap(); + let query = Query::new(&language, &source).unwrap(); assert_eq!(query.start_byte_for_pattern(0), 0); assert_eq!(query.start_byte_for_pattern(5), patterns_1.len()); @@ -3482,7 +3683,7 @@ fn test_query_capture_names() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" (if_statement condition: (parenthesized_expression (binary_expression @@ -3499,12 +3700,7 @@ fn test_query_capture_names() { assert_eq!( query.capture_names(), - &[ - "left-operand".to_string(), - "right-operand".to_string(), - "body".to_string(), - "loop-condition".to_string(), - ] + ["left-operand", "right-operand", "body", "loop-condition"] ); }); } @@ -3512,13 +3708,13 @@ fn test_query_capture_names() { #[test] fn test_query_lifetime_is_separate_from_nodes_lifetime() { allocations::record(|| { - let query = r#"(call_expression) @call"#; + let query = r"(call_expression) @call"; let source = "a(1); b(2);"; let language = get_language("javascript"); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); fn take_first_node_from_captures<'tree>( source: &str, @@ -3528,7 +3724,7 @@ fn test_query_lifetime_is_separate_from_nodes_lifetime() { // Following 2 lines are redundant but needed to demonstrate // more understandable compiler error message let language = get_language("javascript"); - let query = Query::new(language, query).unwrap(); + let query = Query::new(&language, query).unwrap(); let mut cursor = QueryCursor::new(); let node = cursor .matches(&query, node, source.as_bytes()) @@ -3548,7 +3744,7 @@ fn test_query_lifetime_is_separate_from_nodes_lifetime() { node: Node<'tree>, ) -> Node<'tree> { let language = get_language("javascript"); - let query = Query::new(language, query).unwrap(); + let query = Query::new(&language, query).unwrap(); let mut cursor = QueryCursor::new(); let node = cursor .captures(&query, node, source.as_bytes()) @@ -3569,7 +3765,7 @@ fn test_query_lifetime_is_separate_from_nodes_lifetime() { fn test_query_with_no_patterns() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, "").unwrap(); + let query = Query::new(&language, "").unwrap(); assert!(query.capture_names().is_empty()); assert_eq!(query.pattern_count(), 0); }); @@ -3580,7 +3776,7 @@ fn test_query_comments() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " ; this is my first comment ; i have two comments here @@ -3593,7 +3789,7 @@ fn test_query_comments() { let source = "function one() { }"; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); @@ -3609,7 +3805,7 @@ fn test_query_disable_pattern() { allocations::record(|| { let language = get_language("javascript"); let mut query = Query::new( - language, + &language, " (function_declaration name: (identifier) @name) @@ -3629,7 +3825,7 @@ fn test_query_disable_pattern() { let source = "class A { constructor() {} } function b() { return 1; }"; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); @@ -3648,7 +3844,7 @@ fn test_query_alternative_predicate_prefix() { allocations::record(|| { let language = get_language("c"); let query = Query::new( - language, + &language, r#" ((call_expression function: (identifier) @keyword @@ -3668,7 +3864,7 @@ fn test_query_alternative_predicate_prefix() { } "#; assert_query_matches( - language, + &language, &query, source, &[(0, vec![("keyword", "DEFUN"), ("function", "\"identity\"")])], @@ -3683,7 +3879,7 @@ fn test_query_random() { allocations::record(|| { let language = get_language("rust"); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let mut cursor = QueryCursor::new(); cursor.set_match_limit(64); @@ -3704,7 +3900,7 @@ fn test_query_random() { let pattern = pattern_ast.to_string(); let expected_matches = pattern_ast.matches_in_tree(&test_tree); - let query = match Query::new(language, &pattern) { + let query = match Query::new(&language, &pattern) { Ok(query) => query, Err(e) => { panic!("failed to build query for pattern {pattern} - {e}. seed: {seed}"); @@ -3721,7 +3917,7 @@ fn test_query_random() { captures: mat .captures .iter() - .map(|c| (query.capture_names()[c.index as usize].as_str(), c.node)) + .map(|c| (query.capture_names()[c.index as usize], c.node)) .collect::>(), }) .collect::>(); @@ -3753,7 +3949,7 @@ fn test_query_is_pattern_guaranteed_at_step() { Row { description: "no guaranteed steps", language: get_language("python"), - pattern: r#"(expression_statement (string))"#, + pattern: r"(expression_statement (string))", results_by_substring: &[("expression_statement", false), ("string", false)], }, Row { @@ -3831,17 +4027,17 @@ fn test_query_is_pattern_guaranteed_at_step() { Row { description: "a guaranteed step with a field", language: get_language("javascript"), - pattern: r#"(binary_expression left: (identifier) right: (_))"#, + pattern: r"(binary_expression left: (expression) right: (_))", results_by_substring: &[ ("binary_expression", false), - ("(identifier)", false), + ("(expression)", false), ("(_)", true), ], }, Row { description: "multiple guaranteed steps with fields", language: get_language("javascript"), - pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#, + pattern: r"(function_declaration name: (identifier) body: (statement_block))", results_by_substring: &[ ("function_declaration", false), ("identifier", true), @@ -3881,12 +4077,12 @@ fn test_query_is_pattern_guaranteed_at_step() { Row { description: "nesting, no guaranteed steps", language: get_language("javascript"), - pattern: r#" + pattern: r" (call_expression function: (member_expression property: (property_identifier) @template-tag) arguments: (template_string)) @template-call - "#, + ", results_by_substring: &[("property_identifier", false), ("template_string", false)], }, Row { @@ -3901,7 +4097,7 @@ fn test_query_is_pattern_guaranteed_at_step() { "#, results_by_substring: &[ ("identifier", false), - ("property_identifier", true), + ("property_identifier", false), ("[", true), ], }, @@ -3925,15 +4121,15 @@ fn test_query_is_pattern_guaranteed_at_step() { Row { description: "alternation where one branch has guaranteed steps", language: get_language("javascript"), - pattern: r#" + pattern: r" [ (unary_expression (identifier)) (call_expression function: (_) arguments: (_)) - (binary_expression right:(call_expression)) + (binary_expression right: (call_expression)) ] - "#, + ", results_by_substring: &[ ("identifier", false), ("right:", false), @@ -3978,53 +4174,56 @@ fn test_query_is_pattern_guaranteed_at_step() { Row { description: "hidden nodes that have several fields", language: get_language("java"), - pattern: r#" + pattern: r" (method_declaration name: (identifier)) - "#, + ", results_by_substring: &[("name:", true)], }, Row { description: "top-level non-terminal extra nodes", language: get_language("ruby"), - pattern: r#" + pattern: r" (heredoc_body (interpolation) (heredoc_end) @end) - "#, + ", results_by_substring: &[ ("(heredoc_body", false), ("(interpolation)", false), ("(heredoc_end)", true), ], }, - Row { - description: "multiple extra nodes", - language: get_language("rust"), - pattern: r#" - (call_expression - (line_comment) @a - (line_comment) @b - (arguments)) - "#, - results_by_substring: &[ - ("(line_comment) @a", false), - ("(line_comment) @b", false), - ("(arguments)", true), - ], - }, + // TODO: figure out why line comments, an extra, are no longer allowed *anywhere* + // likely culprits are the fact that it's no longer a token itself or that it uses an + // external token + // Row { + // description: "multiple extra nodes", + // language: get_language("rust"), + // pattern: r" + // (call_expression + // (line_comment) @a + // (line_comment) @b + // (arguments)) + // ", + // results_by_substring: &[ + // ("(line_comment) @a", false), + // ("(line_comment) @b", false), + // ("(arguments)", true), + // ], + // }, ]; allocations::record(|| { - eprintln!(""); + eprintln!(); - for row in rows.iter() { + for row in rows { if let Some(filter) = EXAMPLE_FILTER.as_ref() { if !row.description.contains(filter.as_str()) { continue; } } eprintln!(" query example: {:?}", row.description); - let query = Query::new(row.language, row.pattern).unwrap(); + let query = Query::new(&row.language, row.pattern).unwrap(); for (substring, is_definite) in row.results_by_substring { let offset = row.pattern.find(substring).unwrap(); assert_eq!( @@ -4038,7 +4237,7 @@ fn test_query_is_pattern_guaranteed_at_step() { .join(" "), substring, is_definite, - ) + ); } } }); @@ -4055,12 +4254,12 @@ fn test_query_is_pattern_rooted() { let rows = [ Row { description: "simple token", - pattern: r#"(identifier)"#, + pattern: r"(identifier)", is_rooted: true, }, Row { description: "simple non-terminal", - pattern: r#"(function_definition name: (identifier))"#, + pattern: r"(function_definition name: (identifier))", is_rooted: true, }, Row { @@ -4070,11 +4269,11 @@ fn test_query_is_pattern_rooted() { }, Row { description: "alternative of many non-terminals", - pattern: r#"[ + pattern: r"[ (function_definition name: (identifier)) (class_definition name: (identifier)) (block) - ]"#, + ]", is_rooted: true, }, Row { @@ -4084,7 +4283,7 @@ fn test_query_is_pattern_rooted() { }, Row { description: "top-level repetition", - pattern: r#"(comment)*"#, + pattern: r"(comment)*", is_rooted: false, }, Row { @@ -4099,18 +4298,18 @@ fn test_query_is_pattern_rooted() { }, Row { description: "alternative where one option has a top-level repetition", - pattern: r#"[ + pattern: r"[ (block) (class_definition) (comment)* (function_definition) - ]"#, + ]", is_rooted: false, }, ]; allocations::record(|| { - eprintln!(""); + eprintln!(); let language = get_language("python"); for row in &rows { @@ -4120,7 +4319,7 @@ fn test_query_is_pattern_rooted() { } } eprintln!(" query example: {:?}", row.description); - let query = Query::new(language, row.pattern).unwrap(); + let query = Query::new(&language, row.pattern).unwrap(); assert_eq!( query.is_pattern_rooted(0), row.is_rooted, @@ -4130,7 +4329,7 @@ fn test_query_is_pattern_rooted() { .split_ascii_whitespace() .collect::>() .join(" "), - ) + ); } }); } @@ -4147,25 +4346,25 @@ fn test_query_is_pattern_non_local() { let rows = [ Row { description: "simple token", - pattern: r#"(identifier)"#, + pattern: r"(identifier)", language: get_language("python"), is_non_local: false, }, Row { description: "siblings that can occur in an argument list", - pattern: r#"((identifier) (identifier))"#, + pattern: r"((identifier) (identifier))", language: get_language("python"), is_non_local: true, }, Row { description: "siblings that can occur in a statement block", - pattern: r#"((return_statement) (return_statement))"#, + pattern: r"((return_statement) (return_statement))", language: get_language("python"), is_non_local: true, }, Row { description: "siblings that can occur in a source file", - pattern: r#"((function_definition) (class_definition))"#, + pattern: r"((function_definition) (class_definition))", language: get_language("python"), is_non_local: true, }, @@ -4183,32 +4382,32 @@ fn test_query_is_pattern_non_local() { }, Row { description: "siblings that can occur in a class body, wildcard root", - pattern: r#"(_ (method_definition) (method_definition)) @foo"#, + pattern: r"(_ (method_definition) (method_definition)) @foo", language: get_language("javascript"), is_non_local: true, }, Row { description: "top-level repetitions that can occur in a class body", - pattern: r#"(method_definition)+ @foo"#, + pattern: r"(method_definition)+ @foo", language: get_language("javascript"), is_non_local: true, }, Row { description: "top-level repetitions that can occur in a statement block", - pattern: r#"(return_statement)+ @foo"#, + pattern: r"(return_statement)+ @foo", language: get_language("javascript"), is_non_local: true, }, Row { description: "rooted pattern that can occur in a statement block", - pattern: r#"(return_statement) @foo"#, + pattern: r"(return_statement) @foo", language: get_language("javascript"), is_non_local: false, }, ]; allocations::record(|| { - eprintln!(""); + eprintln!(); for row in &rows { if let Some(filter) = EXAMPLE_FILTER.as_ref() { @@ -4217,7 +4416,7 @@ fn test_query_is_pattern_non_local() { } } eprintln!(" query example: {:?}", row.description); - let query = Query::new(row.language, row.pattern).unwrap(); + let query = Query::new(&row.language, row.pattern).unwrap(); assert_eq!( query.is_pattern_non_local(0), row.is_non_local, @@ -4227,7 +4426,7 @@ fn test_query_is_pattern_non_local() { .split_ascii_whitespace() .collect::>() .join(" "), - ) + ); } }); } @@ -4246,17 +4445,17 @@ fn test_capture_quantifiers() { Row { description: "Top level capture", language: get_language("python"), - pattern: r#" + pattern: r" (module) @mod - "#, + ", capture_quantifiers: &[(0, "mod", CaptureQuantifier::One)], }, Row { description: "Nested list capture capture", language: get_language("javascript"), - pattern: r#" + pattern: r" (array (_)* @elems) @array - "#, + ", capture_quantifiers: &[ (0, "array", CaptureQuantifier::One), (0, "elems", CaptureQuantifier::ZeroOrMore), @@ -4265,9 +4464,9 @@ fn test_capture_quantifiers() { Row { description: "Nested non-empty list capture capture", language: get_language("javascript"), - pattern: r#" + pattern: r" (array (_)+ @elems) @array - "#, + ", capture_quantifiers: &[ (0, "array", CaptureQuantifier::One), (0, "elems", CaptureQuantifier::OneOrMore), @@ -4277,9 +4476,9 @@ fn test_capture_quantifiers() { Row { description: "capture nested in optional pattern", language: get_language("javascript"), - pattern: r#" + pattern: r" (array (call_expression (arguments (_) @arg))? @call) @array - "#, + ", capture_quantifiers: &[ (0, "array", CaptureQuantifier::One), (0, "call", CaptureQuantifier::ZeroOrOne), @@ -4289,9 +4488,9 @@ fn test_capture_quantifiers() { Row { description: "optional capture nested in non-empty list pattern", language: get_language("javascript"), - pattern: r#" + pattern: r" (array (call_expression (arguments (_)? @arg))+ @call) @array - "#, + ", capture_quantifiers: &[ (0, "array", CaptureQuantifier::One), (0, "call", CaptureQuantifier::OneOrMore), @@ -4301,9 +4500,9 @@ fn test_capture_quantifiers() { Row { description: "non-empty list capture nested in optional pattern", language: get_language("javascript"), - pattern: r#" + pattern: r" (array (call_expression (arguments (_)+ @args))? @call) @array - "#, + ", capture_quantifiers: &[ (0, "array", CaptureQuantifier::One), (0, "call", CaptureQuantifier::ZeroOrOne), @@ -4314,19 +4513,19 @@ fn test_capture_quantifiers() { Row { description: "capture is the same in all alternatives", language: get_language("javascript"), - pattern: r#"[ + pattern: r"[ (function_declaration name:(identifier) @name) (call_expression function:(identifier) @name) - ]"#, + ]", capture_quantifiers: &[(0, "name", CaptureQuantifier::One)], }, Row { description: "capture appears in some alternatives", language: get_language("javascript"), - pattern: r#"[ + pattern: r"[ (function_declaration name:(identifier) @name) - (function) - ] @fun"#, + (function_expression) + ] @fun", capture_quantifiers: &[ (0, "fun", CaptureQuantifier::One), (0, "name", CaptureQuantifier::ZeroOrOne), @@ -4335,10 +4534,10 @@ fn test_capture_quantifiers() { Row { description: "capture has different quantifiers in alternatives", language: get_language("javascript"), - pattern: r#"[ - (call_expression arguments:(arguments (_)+ @args)) - (new_expression arguments:(arguments (_)? @args)) - ] @call"#, + pattern: r"[ + (call_expression arguments: (arguments (_)+ @args)) + (new_expression arguments: (arguments (_)? @args)) + ] @call", capture_quantifiers: &[ (0, "call", CaptureQuantifier::One), (0, "args", CaptureQuantifier::ZeroOrMore), @@ -4348,9 +4547,9 @@ fn test_capture_quantifiers() { Row { description: "siblings have different captures with different quantifiers", language: get_language("javascript"), - pattern: r#" + pattern: r" (call_expression (arguments (identifier)? @self (_)* @args)) @call - "#, + ", capture_quantifiers: &[ (0, "call", CaptureQuantifier::One), (0, "self", CaptureQuantifier::ZeroOrOne), @@ -4360,9 +4559,9 @@ fn test_capture_quantifiers() { Row { description: "siblings have same capture with different quantifiers", language: get_language("javascript"), - pattern: r#" + pattern: r" (call_expression (arguments (identifier) @args (_)* @args)) @call - "#, + ", capture_quantifiers: &[ (0, "call", CaptureQuantifier::One), (0, "args", CaptureQuantifier::OneOrMore), @@ -4372,7 +4571,7 @@ fn test_capture_quantifiers() { Row { description: "combined nesting, alternatives, and siblings", language: get_language("javascript"), - pattern: r#" + pattern: r" (array (call_expression (arguments [ @@ -4381,7 +4580,7 @@ fn test_capture_quantifiers() { ]) )+ @call ) @array - "#, + ", capture_quantifiers: &[ (0, "array", CaptureQuantifier::One), (0, "call", CaptureQuantifier::OneOrMore), @@ -4393,12 +4592,12 @@ fn test_capture_quantifiers() { Row { description: "multiple patterns", language: get_language("javascript"), - pattern: r#" + pattern: r" (function_declaration name: (identifier) @x) (statement_identifier) @y (property_identifier)+ @z (array (identifier)* @x) - "#, + ", capture_quantifiers: &[ // x (0, "x", CaptureQuantifier::One), @@ -4420,7 +4619,7 @@ fn test_capture_quantifiers() { Row { description: "multiple alternatives", language: get_language("javascript"), - pattern: r#" + pattern: r" [ (array (identifier) @x) (function_declaration name: (identifier)+ @x) @@ -4429,7 +4628,7 @@ fn test_capture_quantifiers() { (array (identifier) @x) (function_declaration name: (identifier)+ @x) ] - "#, + ", capture_quantifiers: &[ (0, "x", CaptureQuantifier::OneOrMore), (1, "x", CaptureQuantifier::OneOrMore), @@ -4438,16 +4637,16 @@ fn test_capture_quantifiers() { ]; allocations::record(|| { - eprintln!(""); + eprintln!(); - for row in rows.iter() { + for row in rows { if let Some(filter) = EXAMPLE_FILTER.as_ref() { if !row.description.contains(filter.as_str()) { continue; } } eprintln!(" query example: {:?}", row.description); - let query = Query::new(row.language, row.pattern).unwrap(); + let query = Query::new(&row.language, row.pattern).unwrap(); for (pattern, capture, expected_quantifier) in row.capture_quantifiers { let index = query.capture_index_for_name(capture).unwrap(); let actual_quantifier = query.capture_quantifiers(*pattern)[index as usize]; @@ -4463,61 +4662,448 @@ fn test_capture_quantifiers() { capture, *expected_quantifier, actual_quantifier, - ) + ); } } }); } -fn assert_query_matches( - language: Language, - query: &Query, - source: &str, - expected: &[(usize, Vec<(&str, &str)>)], -) { +#[test] +fn test_query_quantified_captures() { + struct Row { + description: &'static str, + language: Language, + code: &'static str, + pattern: &'static str, + captures: &'static [(&'static str, &'static str)], + } + + // #[rustfmt::skip] + let rows = &[ + Row { + description: "doc comments where all must match the prefix", + language: get_language("c"), + code: indoc! {" + /// foo + /// bar + /// baz + + void main() {} + + /// qux + /// quux + // quuz + "}, + pattern: r#" + ((comment)+ @comment.documentation + (#match? @comment.documentation "^///")) + "#, + captures: &[ + ("comment.documentation", "/// foo"), + ("comment.documentation", "/// bar"), + ("comment.documentation", "/// baz"), + ], + }, + Row { + description: "doc comments where one must match the prefix", + language: get_language("c"), + code: indoc! {" + /// foo + /// bar + /// baz + + void main() {} + + /// qux + /// quux + // quuz + "}, + pattern: r#" + ((comment)+ @comment.documentation + (#any-match? @comment.documentation "^///")) + "#, + captures: &[ + ("comment.documentation", "/// foo"), + ("comment.documentation", "/// bar"), + ("comment.documentation", "/// baz"), + ("comment.documentation", "/// qux"), + ("comment.documentation", "/// quux"), + ("comment.documentation", "// quuz"), + ], + }, + ]; + + allocations::record(|| { + for row in rows { + eprintln!(" quantified query example: {:?}", row.description); + + let mut parser = Parser::new(); + parser.set_language(&row.language).unwrap(); + let tree = parser.parse(row.code, None).unwrap(); + + let query = Query::new(&row.language, row.pattern).unwrap(); + + let mut cursor = QueryCursor::new(); + let matches = cursor.captures(&query, tree.root_node(), row.code.as_bytes()); + + assert_eq!(collect_captures(matches, &query, row.code), row.captures); + } + }); +} + +#[test] +fn test_query_max_start_depth() { + struct Row { + description: &'static str, + pattern: &'static str, + depth: u32, + matches: &'static [(usize, &'static [(&'static str, &'static str)])], + } + + let source = indoc! {" + if (a1 && a2) { + if (b1 && b2) { } + if (c) { } + } + if (d) { + if (e1 && e2) { } + if (f) { } + } + "}; + + #[rustfmt::skip] + let rows = &[ + Row { + description: "depth 0: match translation unit", + depth: 0, + pattern: r" + (translation_unit) @capture + ", + matches: &[ + (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}\nif (d) {\n if (e1 && e2) { }\n if (f) { }\n}\n")]), + ] + }, + Row { + description: "depth 0: match none", + depth: 0, + pattern: r" + (if_statement) @capture + ", + matches: &[] + }, + Row { + description: "depth 1: match 2 if statements at the top level", + depth: 1, + pattern: r" + (if_statement) @capture + ", + matches : &[ + (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}")]), + (0, &[("capture", "if (d) {\n if (e1 && e2) { }\n if (f) { }\n}")]), + ] + }, + Row { + description: "depth 1 with deep pattern: match the only the first if statement", + depth: 1, + pattern: r" + (if_statement + condition: (parenthesized_expression + (binary_expression) + ) + ) @capture + ", + matches: &[ + (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}")]), + ] + }, + Row { + description: "depth 3 with deep pattern: match all if statements with a binexpr condition", + depth: 3, + pattern: r" + (if_statement + condition: (parenthesized_expression + (binary_expression) + ) + ) @capture + ", + matches: &[ + (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}")]), + (0, &[("capture", "if (b1 && b2) { }")]), + (0, &[("capture", "if (e1 && e2) { }")]), + ] + }, + ]; + + allocations::record(|| { + let language = get_language("c"); + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + for row in rows { + eprintln!(" query example: {:?}", row.description); + + let query = Query::new(&language, row.pattern).unwrap(); + cursor.set_max_start_depth(Some(row.depth)); + + let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); + let expected = row + .matches + .iter() + .map(|x| (x.0, x.1.to_vec())) + .collect::>(); + + assert_eq!(collect_matches(matches, &query, source), expected); + } + }); +} + +#[test] +fn test_query_error_does_not_oob() { + let language = get_language("javascript"); + + assert_eq!( + Query::new(&language, "(clas").unwrap_err(), + QueryError { + row: 0, + offset: 1, + column: 1, + kind: QueryErrorKind::NodeType, + message: "clas".to_string() + } + ); +} + +#[test] +fn test_consecutive_zero_or_modifiers() { + let language = get_language("javascript"); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!(collect_matches(matches, &query, source), expected); - assert_eq!(cursor.did_exceed_match_limit(), false); + parser.set_language(&language).unwrap(); + + let zero_source = ""; + let three_source = "/**/ /**/ /**/"; + + let zero_tree = parser.parse(zero_source, None).unwrap(); + let three_tree = parser.parse(three_source, None).unwrap(); + + let tests = [ + "(comment)*** @capture", + "(comment)??? @capture", + "(comment)*?* @capture", + "(comment)?*? @capture", + ]; + + for test in tests { + let query = Query::new(&language, test).unwrap(); + + let mut cursor = QueryCursor::new(); + let mut matches = cursor.matches(&query, zero_tree.root_node(), zero_source.as_bytes()); + assert!(matches.next().is_some()); + + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, three_tree.root_node(), three_source.as_bytes()); + + let mut len_3 = false; + let mut len_1 = false; + + for m in matches { + if m.captures.len() == 3 { + len_3 = true; + } + if m.captures.len() == 1 { + len_1 = true; + } + } + + assert_eq!(len_3, test.contains('*')); + assert_eq!(len_1, test.contains("???")); + } } -fn collect_matches<'a>( - matches: impl Iterator>, - query: &'a Query, - source: &'a str, -) -> Vec<(usize, Vec<(&'a str, &'a str)>)> { - matches - .map(|m| { - ( - m.pattern_index, - format_captures(m.captures.iter().cloned(), query, source), - ) - }) - .collect() +#[test] +fn test_query_max_start_depth_more() { + struct Row { + depth: u32, + matches: &'static [(usize, &'static [(&'static str, &'static str)])], + } + + let source = indoc! {" + { + { } + { + { } + } + } + "}; + + #[rustfmt::skip] + let rows = &[ + Row { + depth: 0, + matches: &[ + (0, &[("capture", "{\n { }\n {\n { }\n }\n}")]) + ] + }, + Row { + depth: 1, + matches: &[ + (0, &[("capture", "{\n { }\n {\n { }\n }\n}")]), + (0, &[("capture", "{ }")]), + (0, &[("capture", "{\n { }\n }")]) + ] + }, + Row { + depth: 2, + matches: &[ + (0, &[("capture", "{\n { }\n {\n { }\n }\n}")]), + (0, &[("capture", "{ }")]), + (0, &[("capture", "{\n { }\n }")]), + (0, &[("capture", "{ }")]), + ] + }, + ]; + + allocations::record(|| { + let language = get_language("c"); + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let query = Query::new(&language, "(compound_statement) @capture").unwrap(); + + let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); + let node = matches.next().unwrap().captures[0].node; + assert_eq!(node.kind(), "compound_statement"); + + for row in rows { + eprintln!(" depth: {}", row.depth); + + cursor.set_max_start_depth(Some(row.depth)); + + let matches = cursor.matches(&query, node, source.as_bytes()); + let expected = row + .matches + .iter() + .map(|x| (x.0, x.1.to_vec())) + .collect::>(); + + assert_eq!(collect_matches(matches, &query, source), expected); + } + }); +} + +#[test] +fn test_grammar_with_aliased_literal_query() { + // module.exports = grammar({ + // name: 'test', + // + // rules: { + // source: $ => repeat(choice($.compound_statement, $.expansion)), + // + // compound_statement: $ => seq(alias(token(prec(-1, '}')), '}')), + // + // expansion: $ => seq('}'), + // }, + // }); + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test", + "rules": { + "source": { + "type": "REPEAT", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "compound_statement" + }, + { + "type": "SYMBOL", + "name": "expansion" + } + ] + } + }, + "compound_statement": { + "type": "SEQ", + "members": [ + { + "type": "ALIAS", + "content": { + "type": "TOKEN", + "content": { + "type": "PREC", + "value": -1, + "content": { + "type": "STRING", + "value": "}" + } + } + }, + "named": false, + "value": "}" + } + ] + }, + "expansion": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "}" + } + ] + } + } + } + "#, + ) + .unwrap(); + + let language = get_test_language(&parser_name, &parser_code, None); + + let query = Query::new( + &language, + r#" + (compound_statement "}" @bracket1) + (expansion "}" @bracket2) + "#, + ); + + assert!(query.is_ok()); } -fn collect_captures<'a>( - captures: impl Iterator, usize)>, - query: &'a Query, - source: &'a str, -) -> Vec<(&'a str, &'a str)> { - format_captures(captures.map(|(m, i)| m.captures[i]), query, source) +#[test] +fn test_query_with_first_child_in_group_is_anchor() { + let language = get_language("c"); + let source_code = r"void fun(int a, char b, int c) { };"; + let query = r#" + (parameter_list + . + ((parameter_declaration) @constant + (#match? @constant "^int")))"#; + let query = Query::new(&language, query).unwrap(); + assert_query_matches( + &language, + &query, + source_code, + &[(0, vec![("constant", "int a")])], + ); } -fn format_captures<'a>( - captures: impl Iterator>, - query: &'a Query, - source: &'a str, -) -> Vec<(&'a str, &'a str)> { - captures - .map(|capture| { - ( - query.capture_names()[capture.index as usize].as_str(), - capture.node.utf8_text(source.as_bytes()).unwrap(), - ) - }) - .collect() +// This test needs be executed with UBSAN enabled to check for regressions: +// ``` +// UBSAN_OPTIONS="halt_on_error=1" \ +// CFLAGS="-fsanitize=undefined" \ +// RUSTFLAGS="-lubsan" \ +// cargo test --target $(rustc -vV | sed -nr 's/^host: //p') -- --test-threads 1 +// ``` +#[test] +fn test_query_compiler_oob_access() { + let language = get_language("java"); + // UBSAN should not report any OOB access + assert!(Query::new(&language, "(package_declaration _ (_) @name _)").is_ok()); } diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index 07e5d1d..cb07fb7 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -1,15 +1,17 @@ -use super::helpers::{ - allocations, - fixtures::{get_language, get_language_queries_path}, -}; use std::{ ffi::{CStr, CString}, fs, ptr, slice, str, }; + use tree_sitter::Point; use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext}; -const PYTHON_TAG_QUERY: &'static str = r#" +use super::helpers::{ + allocations, + fixtures::{get_language, get_language_queries_path}, +}; + +const PYTHON_TAG_QUERY: &str = r#" ( (function_definition name: (identifier) @name @@ -39,7 +41,7 @@ const PYTHON_TAG_QUERY: &'static str = r#" attribute: (identifier) @name)) @reference.call "#; -const JS_TAG_QUERY: &'static str = r#" +const JS_TAG_QUERY: &str = r#" ( (comment)* @doc . (class_declaration @@ -68,7 +70,7 @@ const JS_TAG_QUERY: &'static str = r#" function: (identifier) @name) @reference.call "#; -const RUBY_TAG_QUERY: &'static str = r#" +const RUBY_TAG_QUERY: &str = r" (method name: (_) @name) @definition.method @@ -79,7 +81,7 @@ const RUBY_TAG_QUERY: &'static str = r#" ((identifier) @name @reference.call (#is-not? local)) -"#; +"; #[test] fn test_tags_python() { @@ -132,7 +134,7 @@ fn test_tags_python() { fn test_tags_javascript() { let language = get_language("javascript"); let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap(); - let source = br#" + let source = br" // hi // Data about a customer. @@ -150,7 +152,7 @@ fn test_tags_javascript() { class Agent { } - "#; + "; let mut tag_context = TagsContext::new(); let tags = tag_context @@ -305,10 +307,10 @@ fn test_tags_with_parse_error() { let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); let mut tag_context = TagsContext::new(); - let source = br#" + let source = br" class Fine: pass class Bad - "#; + "; let (tags, failed) = tag_context .generate_tags(&tags_config, source, None) @@ -359,25 +361,29 @@ fn test_tags_via_c_api() { ); let c_scope_name = CString::new(scope_name).unwrap(); - let result = c::ts_tagger_add_language( - tagger, - c_scope_name.as_ptr(), - language, - JS_TAG_QUERY.as_ptr(), - ptr::null(), - JS_TAG_QUERY.len() as u32, - 0, - ); + let result = unsafe { + c::ts_tagger_add_language( + tagger, + c_scope_name.as_ptr(), + language, + JS_TAG_QUERY.as_ptr(), + ptr::null(), + JS_TAG_QUERY.len() as u32, + 0, + ) + }; assert_eq!(result, c::TSTagsError::Ok); - let result = c::ts_tagger_tag( - tagger, - c_scope_name.as_ptr(), - source_code.as_ptr(), - source_code.len() as u32, - buffer, - ptr::null(), - ); + let result = unsafe { + c::ts_tagger_tag( + tagger, + c_scope_name.as_ptr(), + source_code.as_ptr(), + source_code.len() as u32, + buffer, + ptr::null(), + ) + }; assert_eq!(result, c::TSTagsError::Ok); let tags = unsafe { slice::from_raw_parts( @@ -387,20 +393,20 @@ fn test_tags_via_c_api() { }; let docs = str::from_utf8(unsafe { slice::from_raw_parts( - c::ts_tags_buffer_docs(buffer) as *const u8, + c::ts_tags_buffer_docs(buffer).cast::(), c::ts_tags_buffer_docs_len(buffer) as usize, ) }) .unwrap(); - let syntax_types: Vec<&str> = unsafe { - let mut len: u32 = 0; + let syntax_types = unsafe { + let mut len = 0; let ptr = c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len); slice::from_raw_parts(ptr, len as usize) .iter() .map(|i| CStr::from_ptr(*i).to_str().unwrap()) - .collect() + .collect::>() }; assert_eq!( @@ -419,8 +425,10 @@ fn test_tags_via_c_api() { ] ); - c::ts_tags_buffer_delete(buffer); - c::ts_tagger_delete(tagger); + unsafe { + c::ts_tags_buffer_delete(buffer); + c::ts_tagger_delete(tagger); + } }); } diff --git a/cli/src/tests/test_highlight_test.rs b/cli/src/tests/test_highlight_test.rs index af2c15c..8699c2a 100644 --- a/cli/src/tests/test_highlight_test.rs +++ b/cli/src/tests/test_highlight_test.rs @@ -1,9 +1,12 @@ -use super::helpers::fixtures::{get_highlight_config, get_language, test_loader}; -use crate::query_testing::{parse_position_comments, Assertion}; -use crate::test_highlight::get_highlight_positions; use tree_sitter::{Parser, Point}; use tree_sitter_highlight::{Highlight, Highlighter}; +use super::helpers::fixtures::{get_highlight_config, get_language, test_loader}; +use crate::{ + query_testing::{parse_position_comments, Assertion}, + test_highlight::get_highlight_positions, +}; + #[test] fn test_highlight_test_with_basic_test() { let language = get_language("javascript"); @@ -12,7 +15,7 @@ fn test_highlight_test_with_basic_test() { Some("injections.scm"), &[ "function".to_string(), - "variable.parameter".to_string(), + "variable".to_string(), "keyword".to_string(), ], ); @@ -22,28 +25,21 @@ fn test_highlight_test_with_basic_test() { " // ^ function", " // ^ keyword", " return d + e;", - " // ^ variable.parameter", + " // ^ variable", + " // ^ !variable", "};", ] .join("\n"); let assertions = - parse_position_comments(&mut Parser::new(), language, source.as_bytes()).unwrap(); + parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap(); assert_eq!( assertions, &[ - Assertion { - position: Point::new(1, 5), - expected_capture_name: "function".to_string() - }, - Assertion { - position: Point::new(1, 11), - expected_capture_name: "keyword".to_string() - }, - Assertion { - position: Point::new(4, 9), - expected_capture_name: "variable.parameter".to_string() - }, + Assertion::new(1, 5, false, String::from("function")), + Assertion::new(1, 11, false, String::from("keyword")), + Assertion::new(4, 9, false, String::from("variable")), + Assertion::new(4, 11, true, String::from("variable")), ] ); @@ -60,6 +56,7 @@ fn test_highlight_test_with_basic_test() { (Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d" (Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return" (Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d" + (Point::new(4, 13), Point::new(4, 14), Highlight(1)), // "e" ] ); } diff --git a/cli/src/tests/test_tags_test.rs b/cli/src/tests/test_tags_test.rs index 61f98ab..5e7bf9c 100644 --- a/cli/src/tests/test_tags_test.rs +++ b/cli/src/tests/test_tags_test.rs @@ -1,9 +1,12 @@ -use super::helpers::fixtures::{get_language, get_tags_config}; -use crate::query_testing::{parse_position_comments, Assertion}; -use crate::test_tags::get_tag_positions; use tree_sitter::{Parser, Point}; use tree_sitter_tags::TagsContext; +use super::helpers::fixtures::{get_language, get_tags_config}; +use crate::{ + query_testing::{parse_position_comments, Assertion}, + test_tags::get_tag_positions, +}; + #[test] fn test_tags_test_with_basic_test() { let language = get_language("python"); @@ -16,28 +19,21 @@ fn test_tags_test_with_basic_test() { " # ^ reference.call", " return d(e)", " # ^ reference.call", + " # ^ !variable.parameter", "", ] .join("\n"); let assertions = - parse_position_comments(&mut Parser::new(), language, source.as_bytes()).unwrap(); + parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap(); assert_eq!( assertions, &[ - Assertion { - position: Point::new(1, 4), - expected_capture_name: "definition.function".to_string(), - }, - Assertion { - position: Point::new(3, 9), - expected_capture_name: "reference.call".to_string(), - }, - Assertion { - position: Point::new(5, 11), - expected_capture_name: "reference.call".to_string(), - }, + Assertion::new(1, 4, false, String::from("definition.function")), + Assertion::new(3, 9, false, String::from("reference.call")), + Assertion::new(5, 11, false, String::from("reference.call")), + Assertion::new(5, 13, true, String::from("variable.parameter")), ] ); @@ -62,5 +58,5 @@ fn test_tags_test_with_basic_test() { "reference.call".to_string() ), ] - ) + ); } diff --git a/cli/src/tests/text_provider_test.rs b/cli/src/tests/text_provider_test.rs new file mode 100644 index 0000000..e35e20e --- /dev/null +++ b/cli/src/tests/text_provider_test.rs @@ -0,0 +1,173 @@ +use std::{iter, sync::Arc}; + +use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree}; + +use crate::tests::helpers::fixtures::get_language; + +fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) { + let language = get_language("c"); + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + (parser.parse(text, None).unwrap(), language) +} + +fn parse_text_with(callback: &mut F) -> (Tree, Language) +where + T: AsRef<[u8]>, + F: FnMut(usize, Point) -> T, +{ + let language = get_language("c"); + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse_with(callback, None).unwrap(); + // eprintln!("{}", tree.clone().root_node().to_sexp()); + assert_eq!("comment", tree.root_node().child(0).unwrap().kind()); + (tree, language) +} + +fn tree_query>(tree: &Tree, text: impl TextProvider, language: &Language) { + let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap(); + let mut cursor = QueryCursor::new(); + let mut captures = cursor.captures(&query, tree.root_node(), text); + let (match_, idx) = captures.next().unwrap(); + let capture = match_.captures[idx]; + assert_eq!(capture.index as usize, idx); + assert_eq!("comment", capture.node.kind()); +} + +fn check_parsing>( + parser_text: impl AsRef<[u8]>, + text_provider: impl TextProvider, +) { + let (tree, language) = parse_text(parser_text); + tree_query(&tree, text_provider, &language); +} + +fn check_parsing_callback>( + parser_callback: &mut F, + text_provider: impl TextProvider, +) where + T: AsRef<[u8]>, + F: FnMut(usize, Point) -> T, +{ + let (tree, language) = parse_text_with(parser_callback); + tree_query(&tree, text_provider, &language); +} + +#[test] +fn test_text_provider_for_str_slice() { + let text: &str = "// comment"; + + check_parsing(text, text.as_bytes()); + check_parsing(text.as_bytes(), text.as_bytes()); +} + +#[test] +fn test_text_provider_for_string() { + let text: String = "// comment".to_owned(); + + check_parsing(text.clone(), text.as_bytes()); + check_parsing(text.as_bytes(), text.as_bytes()); + check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes()); +} + +#[test] +fn test_text_provider_for_box_of_str_slice() { + let text = "// comment".to_owned().into_boxed_str(); + + check_parsing(text.as_bytes(), text.as_bytes()); + check_parsing(<_ as AsRef>::as_ref(&text), text.as_bytes()); + check_parsing(text.as_ref(), text.as_ref().as_bytes()); + check_parsing(text.as_ref(), text.as_bytes()); +} + +#[test] +fn test_text_provider_for_box_of_bytes_slice() { + let text = "// comment".to_owned().into_boxed_str().into_boxed_bytes(); + + check_parsing(text.as_ref(), text.as_ref()); + check_parsing(text.as_ref(), &*text); + check_parsing(&*text, &*text); +} + +#[test] +fn test_text_provider_for_vec_of_bytes() { + let text = "// comment".to_owned().into_bytes(); + + check_parsing(&*text, &*text); +} + +#[test] +fn test_text_provider_for_arc_of_bytes_slice() { + let text: Arc<[u8]> = Arc::from("// comment".to_owned().into_bytes()); + + check_parsing(&*text, &*text); + check_parsing(text.as_ref(), text.as_ref()); + check_parsing(text.clone(), text.as_ref()); +} + +#[test] +fn test_text_provider_callback_with_str_slice() { + let text: &str = "// comment"; + + check_parsing(text, |_node: Node<'_>| iter::once(text)); + check_parsing_callback( + &mut |offset, _point| { + (offset < text.len()) + .then_some(text.as_bytes()) + .unwrap_or_default() + }, + |_node: Node<'_>| iter::once(text), + ); +} + +#[test] +fn test_text_provider_callback_with_owned_string_slice() { + let text: &str = "// comment"; + + check_parsing_callback( + &mut |offset, _point| { + (offset < text.len()) + .then_some(text.as_bytes()) + .unwrap_or_default() + }, + |_node: Node<'_>| { + let slice: String = text.to_owned(); + iter::once(slice) + }, + ); +} + +#[test] +fn test_text_provider_callback_with_owned_bytes_vec_slice() { + let text: &str = "// comment"; + + check_parsing_callback( + &mut |offset, _point| { + (offset < text.len()) + .then_some(text.as_bytes()) + .unwrap_or_default() + }, + |_node: Node<'_>| { + let slice = text.to_owned().into_bytes(); + iter::once(slice) + }, + ); +} + +#[test] +fn test_text_provider_callback_with_owned_arc_of_bytes_slice() { + let text: &str = "// comment"; + + check_parsing_callback( + &mut |offset, _point| { + (offset < text.len()) + .then_some(text.as_bytes()) + .unwrap_or_default() + }, + |_node: Node<'_>| { + let slice: Arc<[u8]> = text.to_owned().into_bytes().into(); + iter::once(slice) + }, + ); +} diff --git a/cli/src/tests/tree_test.rs b/cli/src/tests/tree_test.rs index be0c4ff..793b24a 100644 --- a/cli/src/tests/tree_test.rs +++ b/cli/src/tests/tree_test.rs @@ -1,13 +1,14 @@ -use super::helpers::edits::invert_edit; -use super::helpers::fixtures::get_language; -use crate::parse::{perform_edit, Edit}; use std::str; + use tree_sitter::{InputEdit, Parser, Point, Range, Tree}; +use super::helpers::{edits::invert_edit, fixtures::get_language}; +use crate::parse::{perform_edit, Edit}; + #[test] fn test_tree_edit() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse(" abc !== def", None).unwrap(); assert_eq!( @@ -44,7 +45,7 @@ fn test_tree_edit() { } // edit starting in the tree's padding but extending into its content: - // shrink the content to compenstate for the expanded padding. + // shrink the content to compensate for the expanded padding. { let mut tree = tree.clone(); tree.edit(&InputEdit { @@ -207,7 +208,7 @@ fn test_tree_edit() { // replacement that starts in whitespace and extends beyond the end of the tree: // shift the token's start position and empty out its content. { - let mut tree = tree.clone(); + let mut tree = tree; tree.edit(&InputEdit { start_byte: 6, old_end_byte: 90, @@ -235,7 +236,7 @@ fn test_tree_edit() { #[test] fn test_tree_edit_with_included_ranges() { let mut parser = Parser::new(); - parser.set_language(get_language("html")).unwrap(); + parser.set_language(&get_language("html")).unwrap(); let source = "
<% if a %>a<% else %>b<% end %>
"; @@ -300,13 +301,13 @@ fn test_tree_edit_with_included_ranges() { #[test] fn test_tree_cursor() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let tree = parser .parse( " struct Stuff { - a: A; + a: A, b: Option, } ", @@ -322,21 +323,103 @@ fn test_tree_cursor() { assert!(cursor.goto_first_child()); assert_eq!(cursor.node().kind(), "struct"); - assert_eq!(cursor.node().is_named(), false); + assert!(!cursor.node().is_named()); assert!(cursor.goto_next_sibling()); assert_eq!(cursor.node().kind(), "type_identifier"); - assert_eq!(cursor.node().is_named(), true); + assert!(cursor.node().is_named()); assert!(cursor.goto_next_sibling()); assert_eq!(cursor.node().kind(), "field_declaration_list"); - assert_eq!(cursor.node().is_named(), true); + assert!(cursor.node().is_named()); + + assert!(cursor.goto_last_child()); + assert_eq!(cursor.node().kind(), "}"); + assert!(!cursor.node().is_named()); + assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), ","); + assert!(!cursor.node().is_named()); + assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "field_declaration"); + assert!(cursor.node().is_named()); + assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), ","); + assert!(!cursor.node().is_named()); + assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "field_declaration"); + assert!(cursor.node().is_named()); + assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "{"); + assert!(!cursor.node().is_named()); + assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 }); + + let mut copy = tree.walk(); + copy.reset_to(&cursor); + + assert_eq!(copy.node().kind(), "{"); + assert!(!copy.node().is_named()); + + assert!(copy.goto_parent()); + assert_eq!(copy.node().kind(), "field_declaration_list"); + assert!(copy.node().is_named()); + + assert!(copy.goto_parent()); + assert_eq!(copy.node().kind(), "struct_item"); +} + +#[test] +fn test_tree_cursor_previous_sibling() { + let mut parser = Parser::new(); + parser.set_language(&get_language("rust")).unwrap(); + + let text = " + // Hi there + // This is fun! + // Another one! +"; + let tree = parser.parse(text, None).unwrap(); + + let mut cursor = tree.walk(); + assert_eq!(cursor.node().kind(), "source_file"); + + assert!(cursor.goto_last_child()); + assert_eq!(cursor.node().kind(), "line_comment"); + assert_eq!( + cursor.node().utf8_text(text.as_bytes()).unwrap(), + "// Another one!" + ); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "line_comment"); + assert_eq!( + cursor.node().utf8_text(text.as_bytes()).unwrap(), + "// This is fun!" + ); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "line_comment"); + assert_eq!( + cursor.node().utf8_text(text.as_bytes()).unwrap(), + "// Hi there" + ); + + assert!(!cursor.goto_previous_sibling()); } #[test] fn test_tree_cursor_fields() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser .parse("function /*1*/ bar /*2*/ () {}", None) @@ -373,7 +456,7 @@ fn test_tree_cursor_fields() { #[test] fn test_tree_cursor_child_for_point() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let source = &" [ one, @@ -480,7 +563,7 @@ fn test_tree_cursor_child_for_point() { #[test] fn test_tree_node_equality() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let tree = parser.parse("struct A {}", None).unwrap(); let node1 = tree.root_node(); let node2 = tree.root_node(); @@ -494,7 +577,7 @@ fn test_get_changed_ranges() { let source_code = b"{a: null};\n".to_vec(); let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse(&source_code, None).unwrap(); assert_eq!( @@ -514,11 +597,11 @@ fn test_get_changed_ranges() { inserted_text: b"othing".to_vec(), }; let inverse_edit = invert_edit(&source_code, &edit); - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit); assert_eq!(ranges, vec![range_of(&source_code, "nothing")]); // Replace `nothing` with `null` - that token has changed syntax - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit); assert_eq!(ranges, vec![range_of(&source_code, "null")]); } @@ -534,11 +617,11 @@ fn test_get_changed_ranges() { inserted_text: b"\n".to_vec(), }; let inverse_edit = invert_edit(&source_code, &edit); - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit); assert_eq!(ranges, vec![]); // Remove leading newline - no changed ranges - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit); assert_eq!(ranges, vec![]); } @@ -554,7 +637,7 @@ fn test_get_changed_ranges() { inserted_text: b", b: false".to_vec(), }; let inverse_edit1 = invert_edit(&source_code, &edit1); - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit1); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1); assert_eq!(ranges, vec![range_of(&source_code, ", b: false")]); let edit2 = Edit { @@ -563,21 +646,21 @@ fn test_get_changed_ranges() { inserted_text: b", c: 1".to_vec(), }; let inverse_edit2 = invert_edit(&source_code, &edit2); - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit2); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit2); assert_eq!(ranges, vec![range_of(&source_code, ", c: 1")]); // Remove the middle pair - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit2); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit2); assert_eq!(ranges, vec![]); // Remove the second pair - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit1); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1); assert_eq!(ranges, vec![]); } // Wrapping elements in larger expressions { - let mut tree = tree.clone(); + let mut tree = tree; let mut source_code = source_code.clone(); // Replace `null` with the binary expression `b === null` @@ -587,23 +670,43 @@ fn test_get_changed_ranges() { inserted_text: b"b === ".to_vec(), }; let inverse_edit1 = invert_edit(&source_code, &edit1); - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit1); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1); assert_eq!(ranges, vec![range_of(&source_code, "b === null")]); // Undo - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit1); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1); assert_eq!(ranges, vec![range_of(&source_code, "null")]); } } -fn index_of(text: &Vec, substring: &str) -> usize { - str::from_utf8(text.as_slice()) - .unwrap() - .find(substring) - .unwrap() +#[test] +fn test_consistency_with_mid_codepoint_edit() { + let mut parser = Parser::new(); + parser.set_language(&get_language("php/php")).unwrap(); + let mut source_code = + b"\n usize { + str::from_utf8(text).unwrap().find(substring).unwrap() } -fn range_of(text: &Vec, substring: &str) -> Range { +fn range_of(text: &[u8], substring: &str) -> Range { let start_byte = index_of(text, substring); let end_byte = start_byte + substring.as_bytes().len(); Range { @@ -618,9 +721,9 @@ fn get_changed_ranges( parser: &mut Parser, tree: &mut Tree, source_code: &mut Vec, - edit: Edit, + edit: &Edit, ) -> Vec { - perform_edit(tree, source_code, &edit); + perform_edit(tree, source_code, edit).unwrap(); let new_tree = parser.parse(&source_code, Some(tree)).unwrap(); let result = tree.changed_ranges(&new_tree).collect(); *tree = new_tree; diff --git a/cli/src/tests/wasm_language_test.rs b/cli/src/tests/wasm_language_test.rs new file mode 100644 index 0000000..cf36dc5 --- /dev/null +++ b/cli/src/tests/wasm_language_test.rs @@ -0,0 +1,254 @@ +use std::fs; + +use lazy_static::lazy_static; +use tree_sitter::{ + wasmtime::Engine, Parser, Query, QueryCursor, WasmError, WasmErrorKind, WasmStore, +}; + +use crate::tests::helpers::{allocations, fixtures::WASM_DIR}; + +lazy_static! { + static ref ENGINE: Engine = Engine::default(); +} + +#[test] +fn test_wasm_stdlib_symbols() { + let symbols = tree_sitter::wasm_stdlib_symbols().collect::>(); + assert_eq!( + symbols, + { + let mut symbols = symbols.clone(); + symbols.sort_unstable(); + symbols + }, + "symbols aren't sorted" + ); + + assert!(symbols.contains(&"malloc")); + assert!(symbols.contains(&"free")); + assert!(symbols.contains(&"memset")); + assert!(symbols.contains(&"memcpy")); +} + +#[test] +fn test_load_wasm_ruby_language() { + allocations::record(|| { + let mut store = WasmStore::new(ENGINE.clone()).unwrap(); + let mut parser = Parser::new(); + let wasm = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap(); + let language = store.load_language("ruby", &wasm).unwrap(); + parser.set_wasm_store(store).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse("class A; end", None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(program (class name: (constant)))" + ); + }); +} + +#[test] +fn test_load_wasm_html_language() { + allocations::record(|| { + let mut store = WasmStore::new(ENGINE.clone()).unwrap(); + let mut parser = Parser::new(); + let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap(); + let language = store.load_language("html", &wasm).unwrap(); + parser.set_wasm_store(store).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser + .parse("

", None) + .unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) (end_tag (tag_name))) (element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))))" + ); + }); +} + +#[test] +fn test_load_wasm_rust_language() { + allocations::record(|| { + let mut store = WasmStore::new(ENGINE.clone()).unwrap(); + let mut parser = Parser::new(); + let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap(); + let language = store.load_language("rust", &wasm).unwrap(); + parser.set_wasm_store(store).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse("fn main() {}", None).unwrap(); + assert_eq!(tree.root_node().to_sexp(), "(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"); + }); +} + +#[test] +fn test_load_wasm_javascript_language() { + allocations::record(|| { + let mut store = WasmStore::new(ENGINE.clone()).unwrap(); + let mut parser = Parser::new(); + let wasm = fs::read(WASM_DIR.join("tree-sitter-javascript.wasm")).unwrap(); + let language = store.load_language("javascript", &wasm).unwrap(); + parser.set_wasm_store(store).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse("const a = b\nconst c = d", None).unwrap(); + assert_eq!(tree.root_node().to_sexp(), "(program (lexical_declaration (variable_declarator name: (identifier) value: (identifier))) (lexical_declaration (variable_declarator name: (identifier) value: (identifier))))"); + }); +} + +#[test] +fn test_load_multiple_wasm_languages() { + allocations::record(|| { + let mut store = WasmStore::new(ENGINE.clone()).unwrap(); + let mut parser = Parser::new(); + + let wasm_cpp = fs::read(WASM_DIR.join("tree-sitter-cpp.wasm")).unwrap(); + let wasm_rs = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap(); + let wasm_rb = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap(); + let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap(); + + let language_rust = store.load_language("rust", &wasm_rs).unwrap(); + let language_cpp = store.load_language("cpp", &wasm_cpp).unwrap(); + let language_ruby = store.load_language("ruby", &wasm_rb).unwrap(); + let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap(); + parser.set_wasm_store(store).unwrap(); + + let mut parser2 = Parser::new(); + parser2 + .set_wasm_store(WasmStore::new(ENGINE.clone()).unwrap()) + .unwrap(); + let mut query_cursor = QueryCursor::new(); + + // First, parse with the store that originally loaded the languages. + // Then parse with a new parser and wasm store, so that the languages + // are added one-by-one, in between parses. + for mut parser in [parser, parser2] { + for _ in 0..2 { + let query_rust = Query::new(&language_rust, "(const_item) @foo").unwrap(); + let query_typescript = + Query::new(&language_typescript, "(class_declaration) @foo").unwrap(); + + parser.set_language(&language_cpp).unwrap(); + let tree = parser.parse("A c = d();", None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(translation_unit (declaration type: (template_type name: (type_identifier) arguments: (template_argument_list (type_descriptor type: (type_identifier)))) declarator: (init_declarator declarator: (identifier) value: (call_expression function: (identifier) arguments: (argument_list)))))" + ); + + parser.set_language(&language_rust).unwrap(); + let source = "const A: B = c();"; + let tree = parser.parse(source, None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(source_file (const_item name: (identifier) type: (type_identifier) value: (call_expression function: (identifier) arguments: (arguments))))" + ); + assert_eq!( + query_cursor + .matches(&query_rust, tree.root_node(), source.as_bytes()) + .count(), + 1 + ); + + parser.set_language(&language_ruby).unwrap(); + let tree = parser.parse("class A; end", None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(program (class name: (constant)))" + ); + + parser.set_language(&language_typescript).unwrap(); + let tree = parser.parse("class A {}", None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(program (class_declaration name: (type_identifier) body: (class_body)))" + ); + assert_eq!( + query_cursor + .matches(&query_typescript, tree.root_node(), source.as_bytes()) + .count(), + 1 + ); + } + } + }); +} + +#[test] +fn test_load_and_reload_wasm_language() { + allocations::record(|| { + let mut store = WasmStore::new(ENGINE.clone()).unwrap(); + + let wasm_rust = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap(); + let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap(); + + let language_rust = store.load_language("rust", &wasm_rust).unwrap(); + let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap(); + assert_eq!(store.language_count(), 2); + + // When a language is dropped, stores can release their instances of that language. + drop(language_rust); + assert_eq!(store.language_count(), 1); + + let language_rust = store.load_language("rust", &wasm_rust).unwrap(); + assert_eq!(store.language_count(), 2); + + drop(language_rust); + drop(language_typescript); + assert_eq!(store.language_count(), 0); + }); +} + +#[test] +fn test_load_wasm_errors() { + allocations::record(|| { + let mut store = WasmStore::new(ENGINE.clone()).unwrap(); + let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap(); + + let bad_wasm = &wasm[1..]; + assert_eq!( + store.load_language("rust", bad_wasm).unwrap_err(), + WasmError { + kind: WasmErrorKind::Parse, + message: "failed to parse dylink section of wasm module".into(), + } + ); + + assert_eq!( + store.load_language("not_rust", &wasm).unwrap_err(), + WasmError { + kind: WasmErrorKind::Instantiate, + message: "module did not contain language function: tree_sitter_not_rust".into(), + } + ); + + let mut bad_wasm = wasm.clone(); + bad_wasm[300..500].iter_mut().for_each(|b| *b = 0); + assert_eq!( + store.load_language("rust", &bad_wasm).unwrap_err().kind, + WasmErrorKind::Compile, + ); + }); +} + +#[test] +fn test_wasm_oom() { + allocations::record(|| { + let mut store = WasmStore::new(ENGINE.clone()).unwrap(); + let mut parser = Parser::new(); + let wasm = fs::read(WASM_DIR.join("tree-sitter-html.wasm")).unwrap(); + let language = store.load_language("html", &wasm).unwrap(); + parser.set_wasm_store(store).unwrap(); + parser.set_language(&language).unwrap(); + + let tag_name = "a-b".repeat(2 * 1024 * 1024); + let code = format!("<{tag_name}>hello world"); + assert!(parser.parse(&code, None).is_none()); + + let tag_name = "a-b".repeat(20); + let code = format!("<{tag_name}>hello world"); + parser.set_language(&language).unwrap(); + let tree = parser.parse(&code, None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))" + ); + }); +} diff --git a/cli/src/util.rs b/cli/src/util.rs index d180cd5..fd4f469 100644 --- a/cli/src/util.rs +++ b/cli/src/util.rs @@ -1,18 +1,18 @@ -use anyhow::Result; -use std::io; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; -use std::thread; -use tree_sitter::{Parser, Tree}; +use std::{ + path::{Path, PathBuf}, + process::{Child, ChildStdin, Command, Stdio}, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, +}; -#[cfg(unix)] -use anyhow::{anyhow, Context}; -#[cfg(unix)] -use std::path::PathBuf; -#[cfg(unix)] -use std::process::{Child, ChildStdin, Command, Stdio}; +use anyhow::{anyhow, Context, Result}; +use indoc::indoc; +use tree_sitter::{Parser, Tree}; +use tree_sitter_config::Config; +use tree_sitter_loader::Config as LoaderConfig; -#[cfg(unix)] const HTML_HEADER: &[u8] = b" @@ -22,62 +22,72 @@ svg { width: 100%; } "; -pub fn cancel_on_stdin() -> Arc { +#[must_use] +pub fn lang_not_found_for_path(path: &Path, loader_config: &LoaderConfig) -> String { + let path = path.display(); + format!( + indoc! {" + No language found for path `{}` + + If a language should be associated with this file extension, please ensure the path to `{}` is inside one of the following directories as specified by your 'config.json':\n\n{}\n + If the directory that contains the relevant grammar for `{}` is not listed above, please add the directory to the list of directories in your config file, {} + "}, + path, + path, + loader_config + .parser_directories + .iter() + .enumerate() + .map(|(i, d)| format!(" {}. {}", i + 1, d.display())) + .collect::>() + .join(" \n"), + path, + if let Ok(Some(config_path)) = Config::find_config_file() { + format!("located at {}", config_path.display()) + } else { + String::from("which you need to create by running `tree-sitter init-config`") + } + ) +} + +#[must_use] +pub fn cancel_on_signal() -> Arc { let result = Arc::new(AtomicUsize::new(0)); - if atty::is(atty::Stream::Stdin) { - thread::spawn({ - let flag = result.clone(); - move || { - let mut line = String::new(); - io::stdin().read_line(&mut line).unwrap(); - flag.store(1, Ordering::Relaxed); - } - }); - } + ctrlc::set_handler({ + let flag = result.clone(); + move || { + flag.store(1, Ordering::Relaxed); + } + }) + .expect("Error setting Ctrl-C handler"); result } -#[cfg(windows)] -pub struct LogSession; - -#[cfg(unix)] pub struct LogSession { path: PathBuf, dot_process: Option, dot_process_stdin: Option, + open_log: bool, } -#[cfg(windows)] -pub fn print_tree_graph(_tree: &Tree, _path: &str) -> Result<()> { - Ok(()) -} - -#[cfg(windows)] -pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result { - Ok(LogSession) -} - -#[cfg(unix)] -pub fn print_tree_graph(tree: &Tree, path: &str) -> Result<()> { - let session = LogSession::new(path)?; +pub fn print_tree_graph(tree: &Tree, path: &str, quiet: bool) -> Result<()> { + let session = LogSession::new(path, quiet)?; tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap()); Ok(()) } -#[cfg(unix)] -pub fn log_graphs(parser: &mut Parser, path: &str) -> Result { - let session = LogSession::new(path)?; +pub fn log_graphs(parser: &mut Parser, path: &str, open_log: bool) -> Result { + let session = LogSession::new(path, open_log)?; parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap()); Ok(session) } -#[cfg(unix)] impl LogSession { - fn new(path: &str) -> Result { + fn new(path: &str, open_log: bool) -> Result { use std::io::Write; let mut dot_file = std::fs::File::create(path)?; - dot_file.write(HTML_HEADER)?; + dot_file.write_all(HTML_HEADER)?; let mut dot_process = Command::new("dot") .arg("-Tsvg") .stdin(Stdio::piped()) @@ -94,11 +104,11 @@ impl LogSession { path: PathBuf::from(path), dot_process: Some(dot_process), dot_process_stdin: Some(dot_stdin), + open_log, }) } } -#[cfg(unix)] impl Drop for LogSession { fn drop(&mut self) { use std::fs; @@ -106,10 +116,8 @@ impl Drop for LogSession { drop(self.dot_process_stdin.take().unwrap()); let output = self.dot_process.take().unwrap().wait_with_output().unwrap(); if output.status.success() { - if cfg!(target_os = "macos") - && fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64 - { - Command::new("open").arg(&self.path).output().unwrap(); + if self.open_log && fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64 { + webbrowser::open(&self.path.to_string_lossy()).unwrap(); } } else { eprintln!( diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs index 467fef7..7782a33 100644 --- a/cli/src/wasm.rs +++ b/cli/src/wasm.rs @@ -1,130 +1,112 @@ -use super::generate::parse_grammar::GrammarJSON; +use std::{ + fs, + path::{Path, PathBuf}, +}; + use anyhow::{anyhow, Context, Result}; -use std::ffi::{OsStr, OsString}; -use std::fs; -use std::path::Path; -use std::process::Command; -use which::which; +use tree_sitter::wasm_stdlib_symbols; +use tree_sitter_loader::Loader; +use wasmparser::Parser; + +use super::generate::parse_grammar::GrammarJSON; -const EMSCRIPTEN_TAG: &'static str = concat!("emscripten/emsdk:", env!("EMSCRIPTEN_VERSION")); +pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec)> { + let grammar_name = get_grammar_name(language_dir) + .with_context(|| "Failed to get wasm filename") + .unwrap(); + let wasm_filename = format!("tree-sitter-{grammar_name}.wasm"); + let contents = fs::read(language_dir.join(&wasm_filename)).with_context(|| { + format!("Failed to read {wasm_filename}. Run `tree-sitter build --wasm` first.",) + })?; + Ok((grammar_name, contents)) +} -pub fn get_grammar_name(src_dir: &Path) -> Result { +pub fn get_grammar_name(language_dir: &Path) -> Result { + let src_dir = language_dir.join("src"); let grammar_json_path = src_dir.join("grammar.json"); let grammar_json = fs::read_to_string(&grammar_json_path) - .with_context(|| format!("Failed to read grammar file {:?}", grammar_json_path))?; + .with_context(|| format!("Failed to read grammar file {grammar_json_path:?}"))?; let grammar: GrammarJSON = serde_json::from_str(&grammar_json) - .with_context(|| format!("Failed to parse grammar file {:?}", grammar_json_path))?; + .with_context(|| format!("Failed to parse grammar file {grammar_json_path:?}"))?; Ok(grammar.name) } -pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Result<()> { - let src_dir = language_dir.join("src"); - let grammar_name = get_grammar_name(&src_dir)?; - let output_filename = format!("tree-sitter-{}.wasm", grammar_name); - - let emcc_bin = if cfg!(windows) { "emcc.bat" } else { "emcc" }; - let emcc_path = which(emcc_bin) - .ok() - .and_then(|p| Command::new(&p).output().and(Ok(p)).ok()); - - let mut command; - if !force_docker && emcc_path.is_some() { - command = Command::new(emcc_path.unwrap()); - command.current_dir(&language_dir); - } else if Command::new("docker").output().is_ok() { - command = Command::new("docker"); - command.args(&["run", "--rm"]); - - // Mount the parser directory as a volume - let mut volume_string; - if let (Some(parent), Some(filename)) = (language_dir.parent(), language_dir.file_name()) { - volume_string = OsString::from(parent); - volume_string.push(":/src:Z"); - command.arg("--workdir"); - command.arg(&Path::new("/src").join(filename)); - } else { - volume_string = OsString::from(language_dir); - volume_string.push(":/src:Z"); - command.args(&["--workdir", "/src"]); - } - - command.args(&[OsStr::new("--volume"), &volume_string]); - - // Get the current user id so that files created in the docker container will have - // the same owner. - if cfg!(unix) { - let user_id_output = Command::new("id") - .arg("-u") - .output() - .with_context(|| "Failed to get get current user id")?; - let user_id = String::from_utf8_lossy(&user_id_output.stdout); - let user_id = user_id.trim(); - command.args(&["--user", user_id]); - } - - // Run `emcc` in a container using the `emscripten-slim` image - command.args(&[EMSCRIPTEN_TAG, "emcc"]); - } else { - if force_docker { - return Err(anyhow!( - "You must have docker on your PATH to run this command with --docker" - )); - } - return Err(anyhow!( - "You must have either emcc or docker on your PATH to run this command" - )); - } - - command.args(&[ - "-o", +pub fn compile_language_to_wasm( + loader: &Loader, + root_dir: Option<&Path>, + language_dir: &Path, + output_dir: &Path, + output_file: Option, + force_docker: bool, +) -> Result<()> { + let grammar_name = get_grammar_name(language_dir)?; + let output_filename = + output_file.unwrap_or_else(|| output_dir.join(format!("tree-sitter-{grammar_name}.wasm"))); + let src_path = language_dir.join("src"); + let scanner_path = loader.get_scanner_path(&src_path); + loader.compile_parser_to_wasm( + &grammar_name, + root_dir, + &src_path, + scanner_path + .as_ref() + .and_then(|p| Some(Path::new(p.file_name()?))), &output_filename, - "-Os", - "-s", - "WASM=1", - "-s", - "SIDE_MODULE=1", - "-s", - "TOTAL_MEMORY=33554432", - "-s", - "NODEJS_CATCH_EXIT=0", - "-s", - "NODEJS_CATCH_REJECTION=0", - "-s", - &format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{}\"]", grammar_name), - "-fno-exceptions", - "-I", - "src", - ]); - - let src = Path::new("src"); - let parser_c_path = src.join("parser.c"); - let scanner_c_path = src.join("scanner.c"); - let scanner_cc_path = src.join("scanner.cc"); - let scanner_cpp_path = src.join("scanner.cpp"); - - if language_dir.join(&scanner_cc_path).exists() { - command.arg("-xc++").arg(&scanner_cc_path); - } else if language_dir.join(&scanner_cpp_path).exists() { - command.arg("-xc++").arg(&scanner_cpp_path); - } else if language_dir.join(&scanner_c_path).exists() { - command.arg(&scanner_c_path); + force_docker, + )?; + + // Exit with an error if the external scanner uses symbols from the + // C or C++ standard libraries that aren't available to wasm parsers. + let stdlib_symbols = wasm_stdlib_symbols().collect::>(); + let dylink_symbols = [ + "__indirect_function_table", + "__memory_base", + "__stack_pointer", + "__table_base", + "__table_base", + "memory", + ]; + let builtin_symbols = [ + "__assert_fail", + "__cxa_atexit", + "abort", + "emscripten_notify_memory_growth", + "tree_sitter_debug_message", + "proc_exit", + ]; + + let mut missing_symbols = Vec::new(); + let wasm_bytes = fs::read(&output_filename)?; + let parser = Parser::new(0); + for payload in parser.parse_all(&wasm_bytes) { + if let wasmparser::Payload::ImportSection(imports) = payload? { + for import in imports { + let import = import?.name; + if !builtin_symbols.contains(&import) + && !stdlib_symbols.contains(&import) + && !dylink_symbols.contains(&import) + { + missing_symbols.push(import); + } + } + } } - command.arg(&parser_c_path); - - let output = command - .output() - .with_context(|| "Failed to run emcc command")?; - if !output.status.success() { - return Err(anyhow!( - "emcc command failed - {}", - String::from_utf8_lossy(&output.stderr) - )); + if !missing_symbols.is_empty() { + Err(anyhow!( + concat!( + "This external scanner uses a symbol that isn't available to wasm parsers.\n", + "\n", + "Missing symbols:\n", + " {}\n", + "\n", + "Available symbols:\n", + " {}", + ), + missing_symbols.join("\n "), + stdlib_symbols.join("\n ") + ))?; } - // Move the created `.wasm` file into the current working directory. - fs::rename(&language_dir.join(&output_filename), &output_filename) - .with_context(|| format!("Couldn't find output file {:?}", output_filename))?; - Ok(()) } diff --git a/debian/changelog b/debian/changelog index 0921c65..e2b30bc 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,18 @@ +tree-sitter (0.22.6-1) experimental; urgency=medium + + * New upstream release + + Fix possible infinite loop when a tree has zero-width tokens (Closes: + #1052020) + * Build librust-tree-sitter{,-tags,-highlight,-loader,-config}-dev and + tree-sitter-cli packages here instead of from their own source packages + * Specify PREFIX and LIBDIR for dh_auto_build + * Update libtree-sitter0.symbols + * Switch package branch to debian/experimental + * Declare compliance with Policy 4.7.0, no changes needed + * Run wrap-and-sort -ast + + -- James McCoy Fri, 06 Dec 2024 15:48:14 -0500 + tree-sitter (0.20.8-2) unstable; urgency=medium * Upload to unstable diff --git a/debian/clean b/debian/clean new file mode 100644 index 0000000..03314f7 --- /dev/null +++ b/debian/clean @@ -0,0 +1 @@ +Cargo.lock diff --git a/debian/control b/debian/control index 0e5a477..cbfb37a 100644 --- a/debian/control +++ b/debian/control @@ -4,12 +4,57 @@ Maintainer: Tree-sitter Maintainers Uploaders: James McCoy , Build-Depends: + cargo:native, debhelper-compat (= 13), -Standards-Version: 4.6.2 + dh-sequence-rust, + librust-ansi-term-0.12+default-dev (>= 0.12.1), + librust-anstyle-1+default-dev (>= 1.0.4), + librust-anyhow-1+default-dev (>= 1.0.82), + librust-cc-1+default-dev (>= 1.0.95), + librust-clap-4+cargo-dev (>= 4.4.18), + librust-clap-4+derive-dev (>= 4.4.18), + librust-clap-4+env-dev (>= 4.4.18), + librust-clap-4+help-dev (>= 4.4.18), + librust-clap-4+unstable-styles-dev (>= 4.4.18), + librust-ctor+default-dev (<< 0.3) , + librust-ctrlc-3+termination-dev (>= 3.4.2), + librust-difference-2+default-dev (>= 2.0.0), + librust-dirs-5+default-dev (>= 5.0.1), + librust-filetime-0.2+default-dev (>= 0.2.23), + librust-fs4-0.11+default-dev (>= 0.11.0), + librust-glob-0.3+default-dev (>= 0.3.1), + librust-heck-0.4+default-dev, + librust-html-escape-0.2+default-dev (>= 0.2.12), + librust-indexmap-2+default-dev (>= 2.2.6), + librust-indoc-2+default-dev (>= 2.0.5), + librust-lazy-static-1+default-dev (>= 1.4.0), + librust-libloading-0.8+default-dev (>= 0.8.3), + librust-log-0.4+std-dev (>= 0.4.21), + librust-memchr-2+default-dev (>= 2.7.1), + librust-once-cell-1+default-dev (>= 1.19.0), + librust-pretty-assertions-1+default-dev (>= 1.4.0) , + librust-rand-0.8+default-dev (>= 0.8.5) , + librust-regex-1+default-dev (>= 1.10.4), + librust-regex-syntax-0.8+default-dev (>= 0.8.2), + librust-rustc-hash-1+default-dev (>= 1.1.0), + librust-semver-1+default-dev (>= 1.0.21), + librust-serde-1+derive-dev (>= 1.0.198), + librust-serde-derive-1+default-dev (>= 1.0.197), + librust-serde-json-1+preserve-order-dev (>= 1.0.116), + librust-smallbitvec-2+default-dev (>= 2.5.1), + librust-tempfile-3+default-dev (>= 3.10.1), + librust-thiserror-1+default-dev (>= 1.0.59), + librust-tiny-http-0.12+default-dev (>= 0.12.0), + librust-unindent-0.2+default-dev (>= 0.2.3) , + librust-walkdir-2+default-dev (>= 2.5.0), + librust-webbrowser-dev (>= 0.8), + libstd-rust-dev, + rustc:native (>= 1.74.1), +Standards-Version: 4.7.0 Section: libs Homepage: https://tree-sitter.github.io/tree-sitter/ Rules-Requires-Root: no -Vcs-Git: https://salsa.debian.org/tree-sitter-team/tree-sitter.git +Vcs-Git: https://salsa.debian.org/tree-sitter-team/tree-sitter.git -b debian/experimental Vcs-Browser: https://salsa.debian.org/tree-sitter-team/tree-sitter Package: libtree-sitter-dev @@ -48,3 +93,170 @@ Description: incremental parsing system for programming tools Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. + +Package: librust-tree-sitter-dev +Section: rust +Architecture: any +Multi-Arch: same +Depends: + librust-cc-1+default-dev (>= 1.0.95), + librust-regex-1+default-dev (>= 1.10.4), + rustc (>= 1.65), + ${misc:Depends}, +Provides: + librust-tree-sitter+default-dev (= ${binary:Version}), + librust-tree-sitter-0+default-dev (= ${binary:Version}), + librust-tree-sitter-0-dev (= ${binary:Version}), + librust-tree-sitter-0.22+default-dev (= ${binary:Version}), + librust-tree-sitter-0.22-dev (= ${binary:Version}), + librust-tree-sitter-0.22.6+default-dev (= ${binary:Version}), + librust-tree-sitter-0.22.6-dev (= ${binary:Version}), +Description: bindings to the tree-sitter parsing library - Rust source code + Tree-sitter is a parser generator tool and an incremental parsing library. It + can build a concrete syntax tree for a source file and efficiently update the + syntax tree as the source file is edited. + . + This package contains the source for the Rust tree-sitter crate, + packaged for use with cargo and dh-cargo. + +Package: librust-tree-sitter-config-dev +Section: rust +Architecture: any +Multi-Arch: same +Depends: + librust-anyhow-1+default-dev (>= 1.0.82), + librust-dirs-5+default-dev (>= 5.0.1), + librust-serde-1+derive-dev (>= 1.0.198), + librust-serde-json-1+preserve-order-dev (>= 1.0.116), + rustc (>= 1.74.1), + ${misc:Depends}, +Provides: + librust-tree-sitter-config+default-dev (= ${binary:Version}), + librust-tree-sitter-config-0+default-dev (= ${binary:Version}), + librust-tree-sitter-config-0-dev (= ${binary:Version}), + librust-tree-sitter-config-0.22+default-dev (= ${binary:Version}), + librust-tree-sitter-config-0.22-dev (= ${binary:Version}), + librust-tree-sitter-config-0.22.6+default-dev (= ${binary:Version}), + librust-tree-sitter-config-0.22.6-dev (= ${binary:Version}), +Description: user configuration of tree-sitter's CLI programs - Rust source code + Tree-sitter is a parser generator tool and an incremental parsing library. It + can build a concrete syntax tree for a source file and efficiently update the + syntax tree as the source file is edited. + . + This package contains the source for the Rust tree-sitter-config crate, + packaged for use with cargo and dh-cargo. + +Package: librust-tree-sitter-highlight-dev +Section: rust +Architecture: any +Multi-Arch: same +Depends: + librust-lazy-static-1+default-dev (>= 1.4.0), + librust-regex-1+default-dev (>= 1.10.4), + librust-thiserror-1+default-dev (>= 1.0.59), + librust-tree-sitter-0.22+default-dev (>= 0.22.6), + rustc (>= 1.74.1), + ${misc:Depends}, +Provides: + librust-tree-sitter-highlight+default-dev (= ${binary:Version}), + librust-tree-sitter-highlight-0+default-dev (= ${binary:Version}), + librust-tree-sitter-highlight-0-dev (= ${binary:Version}), + librust-tree-sitter-highlight-0.22+default-dev (= ${binary:Version}), + librust-tree-sitter-highlight-0.22-dev (= ${binary:Version}), + librust-tree-sitter-highlight-0.22.6+default-dev (= ${binary:Version}), + librust-tree-sitter-highlight-0.22.6-dev (= ${binary:Version}), +Description: perform syntax highlighting with tree-sitter - Rust source code + Tree-sitter is a parser generator tool and an incremental parsing library. It + can build a concrete syntax tree for a source file and efficiently update the + syntax tree as the source file is edited. + . + This package contains the source for the Rust tree-sitter-highlight crate, + packaged for use with cargo and dh-cargo. + +Package: librust-tree-sitter-loader-dev +Section: rust +Architecture: any +Multi-Arch: same +Depends: + librust-anyhow-1+default-dev (>= 1.0.82), + librust-cc-1+default-dev (>= 1.0.83), + librust-dirs-5+default-dev (>= 5.0.1), + librust-fs4-0.11+default-dev (>= 0.11.0), + librust-indoc-2+default-dev (>= 2.0.5), + librust-libloading-0.8+default-dev (>= 0.8.3), + librust-once-cell-1+default-dev (>= 1.19.0), + librust-regex-1+default-dev (>= 1.10.4), + librust-serde-1+derive-dev (>= 1.0.198), + librust-serde-json-1+preserve-order-dev (>= 1.0.116), + librust-tempfile-3+default-dev (>= 3.10.1), + librust-tree-sitter-0.22+default-dev (>= 0.22.6), + librust-tree-sitter-highlight-0.22+default-dev (>= 0.22.6), + librust-tree-sitter-tags-0.22+default-dev (>= 0.22.6), + rustc (>= 1.74.1), + ${misc:Depends}, +Provides: + librust-tree-sitter-loader+default-dev (= ${binary:Version}), + librust-tree-sitter-loader-0+default-dev (= ${binary:Version}), + librust-tree-sitter-loader-0-dev (= ${binary:Version}), + librust-tree-sitter-loader-0.22+default-dev (= ${binary:Version}), + librust-tree-sitter-loader-0.22-dev (= ${binary:Version}), + librust-tree-sitter-loader-0.22.6+default-dev (= ${binary:Version}), + librust-tree-sitter-loader-0.22.6-dev (= ${binary:Version}), +Description: locates, builds, and loads tree-sitter grammars - Rust source code + Tree-sitter is a parser generator tool and an incremental parsing library. It + can build a concrete syntax tree for a source file and efficiently update the + syntax tree as the source file is edited. + . + This package contains the source for the Rust tree-sitter-loader crate, + packaged for use with cargo and dh-cargo. + +Package: librust-tree-sitter-tags-dev +Section: rust +Architecture: any +Multi-Arch: same +Depends: + librust-memchr-2+default-dev (>= 2.7.2), + librust-regex-1+default-dev (>= 1.10.4), + librust-thiserror-1+default-dev (>= 1.0.59), + librust-tree-sitter-0.22+default-dev (>= 0.22.6), + rustc (>= 1.74.1), + ${misc:Depends}, +Provides: + librust-tree-sitter-tags+default-dev (= ${binary:Version}), + librust-tree-sitter-tags-0+default-dev (= ${binary:Version}), + librust-tree-sitter-tags-0-dev (= ${binary:Version}), + librust-tree-sitter-tags-0.22+default-dev (= ${binary:Version}), + librust-tree-sitter-tags-0.22-dev (= ${binary:Version}), + librust-tree-sitter-tags-0.22.6+default-dev (= ${binary:Version}), + librust-tree-sitter-tags-0.22.6-dev (= ${binary:Version}), +Description: library for extracting tag information - Rust source code + Tree-sitter is a parser generator tool and an incremental parsing library. It + can build a concrete syntax tree for a source file and efficiently update the + syntax tree as the source file is edited. + . + This package contains the source for the Rust tree-sitter-tags crate, + packaged for use with cargo and dh-cargo. + +Package: tree-sitter-cli +Section: devel +Architecture: any +Multi-Arch: allowed +Depends: + ${misc:Depends}, + ${shlibs:Depends}, +Built-Using: + ${cargo:Built-Using}, +Static-Built-Using: + ${cargo:Static-Built-Using}, +Recommends: + emscripten, + gcc | c-compiler, + node-gyp, + nodejs, +Description: command-line for Tree-sitter parsers + Tree-sitter is a parser generator tool and an incremental parsing library. It + can build a concrete syntax tree for a source file and efficiently update the + syntax tree as the source file is edited. + . + This package contains the tree-sitter-cli program, which is used to help + generate, build, and test tree-sitter parsers. diff --git a/debian/copyright b/debian/copyright index 90be8c4..fcef3ce 100644 --- a/debian/copyright +++ b/debian/copyright @@ -3,11 +3,11 @@ Upstream-Name: tree-sitter Source: https://tree-sitter.github.io/tree-sitter/ Files: * -Copyright: 2018-2022 Max Brunsfeld +Copyright: 2018-2024 Max Brunsfeld License: Expat Files: debian/* -Copyright: 2020-2023 James McCoy +Copyright: 2020-2024 James McCoy License: Expat Files: lib/src/unicode/* diff --git a/debian/gbp.conf b/debian/gbp.conf index 39dda2f..53a9f42 100644 --- a/debian/gbp.conf +++ b/debian/gbp.conf @@ -1,2 +1,2 @@ [dch] -debian-branch = debian/main +debian-branch = debian/experimental diff --git a/debian/libtree-sitter0.symbols b/debian/libtree-sitter0.symbols index 9c768cf..45ab937 100644 --- a/debian/libtree-sitter0.symbols +++ b/debian/libtree-sitter0.symbols @@ -1,17 +1,36 @@ libtree-sitter.so.0 libtree-sitter0 #MINVER# * Build-Depends-Package: libtree-sitter-dev + (optional)ts_current_calloc@Base 0.21.0 + (optional)ts_current_free@Base 0.21.0 + (optional)ts_current_malloc@Base 0.21.0 + (optional)ts_current_realloc@Base 0.21.0 + ts_language_copy@Base 0.21.0 + ts_language_delete@Base 0.21.0 ts_language_field_count@Base 0.19 ts_language_field_id_for_name@Base 0.19 ts_language_field_name_for_id@Base 0.19 + ts_language_is_wasm@Base 0.20.9 + ts_language_next_state@Base 0.20.9 + ts_language_state_count@Base 0.20.9 ts_language_symbol_count@Base 0.19 ts_language_symbol_for_name@Base 0.19 ts_language_symbol_name@Base 0.19 ts_language_symbol_type@Base 0.19 ts_language_version@Base 0.19 + ts_lookahead_iterator_current_symbol@Base 0.20.9 + ts_lookahead_iterator_current_symbol_name@Base 0.20.9 + ts_lookahead_iterator_delete@Base 0.20.9 + ts_lookahead_iterator_language@Base 0.20.9 + ts_lookahead_iterator_new@Base 0.20.9 + ts_lookahead_iterator_next@Base 0.20.9 + ts_lookahead_iterator_reset@Base 0.20.9 + ts_lookahead_iterator_reset_state@Base 0.20.9 ts_node_child@Base 0.19 ts_node_child_by_field_id@Base 0.19 ts_node_child_by_field_name@Base 0.19 + ts_node_child_containing_descendant@Base 0.22.6 ts_node_child_count@Base 0.19 + ts_node_descendant_count@Base 0.20.9 ts_node_descendant_for_byte_range@Base 0.19 ts_node_descendant_for_point_range@Base 0.19 ts_node_edit@Base 0.19 @@ -21,18 +40,25 @@ libtree-sitter.so.0 libtree-sitter0 #MINVER# ts_node_field_name_for_child@Base 0.20 ts_node_first_child_for_byte@Base 0.19 ts_node_first_named_child_for_byte@Base 0.19 + ts_node_grammar_symbol@Base 0.20.9 + ts_node_grammar_type@Base 0.20.9 ts_node_has_changes@Base 0.19 ts_node_has_error@Base 0.19 + ts_node_is_error@Base 0.20.9 ts_node_is_extra@Base 0.19 ts_node_is_missing@Base 0.19 ts_node_is_named@Base 0.19 ts_node_is_null@Base 0.19 + ts_node_language@Base 0.20.9 ts_node_named_child@Base 0.19 ts_node_named_child_count@Base 0.19 ts_node_named_descendant_for_byte_range@Base 0.19 ts_node_named_descendant_for_point_range@Base 0.19 ts_node_next_named_sibling@Base 0.19 + ts_node_next_parse_state@Base 0.20.9 ts_node_next_sibling@Base 0.19 + ts_node_parent@Base 0.19 + ts_node_parse_state@Base 0.20.9 ts_node_prev_named_sibling@Base 0.19 ts_node_prev_sibling@Base 0.19 ts_node_start_byte@Base 0.19 @@ -50,11 +76,14 @@ libtree-sitter.so.0 libtree-sitter0 #MINVER# ts_parser_parse_string@Base 0.19 ts_parser_parse_string_encoding@Base 0.19 ts_parser_print_dot_graphs@Base 0.19 + ts_parser_reset@Base 0.19 ts_parser_set_cancellation_flag@Base 0.19 ts_parser_set_included_ranges@Base 0.19 ts_parser_set_language@Base 0.19 ts_parser_set_logger@Base 0.19 ts_parser_set_timeout_micros@Base 0.19 + ts_parser_set_wasm_store@Base 0.20.9 + ts_parser_take_wasm_store@Base 0.20.9 ts_parser_timeout_micros@Base 0.19 ts_query_capture_count@Base 0.19 ts_query_capture_name_for_id@Base 0.19 @@ -69,6 +98,7 @@ libtree-sitter.so.0 libtree-sitter0 #MINVER# ts_query_cursor_remove_match@Base 0.19 ts_query_cursor_set_byte_range@Base 0.19 ts_query_cursor_set_match_limit@Base 0.20 + ts_query_cursor_set_max_start_depth@Base 0.20.9 ts_query_cursor_set_point_range@Base 0.19 ts_query_delete@Base 0.19 ts_query_disable_capture@Base 0.19 @@ -84,18 +114,24 @@ libtree-sitter.so.0 libtree-sitter0 #MINVER# ts_query_string_value_for_id@Base 0.19 ts_set_allocator@Base 0.20.2 ts_tree_copy@Base 0.19 - ts_tree_cursor_copy@Base 0.19 + ts_tree_cursor_copy@Base 0.22.4 + ts_tree_cursor_current_depth@Base 0.20.9 + ts_tree_cursor_current_descendant_index@Base 0.20.9 ts_tree_cursor_current_field_id@Base 0.19 ts_tree_cursor_current_field_name@Base 0.19 ts_tree_cursor_current_node@Base 0.19 ts_tree_cursor_delete@Base 0.19 + ts_tree_cursor_goto_descendant@Base 0.20.9 ts_tree_cursor_goto_first_child@Base 0.19 ts_tree_cursor_goto_first_child_for_byte@Base 0.19 ts_tree_cursor_goto_first_child_for_point@Base 0.20 + ts_tree_cursor_goto_last_child@Base 0.20.9 ts_tree_cursor_goto_next_sibling@Base 0.19 ts_tree_cursor_goto_parent@Base 0.19 - ts_tree_cursor_new@Base 0.19 + ts_tree_cursor_goto_previous_sibling@Base 0.20.9 + ts_tree_cursor_new@Base 0.22.4 ts_tree_cursor_reset@Base 0.19 + ts_tree_cursor_reset_to@Base 0.20.9 ts_tree_delete@Base 0.19 ts_tree_edit@Base 0.19 ts_tree_get_changed_ranges@Base 0.19 @@ -104,99 +140,4 @@ libtree-sitter.so.0 libtree-sitter0 #MINVER# ts_tree_print_dot_graph@Base 0.20.8 ts_tree_root_node@Base 0.19 ts_tree_root_node_with_offset@Base 0.20.7 - (optional)ts_current_calloc@Base 0.20.2 - (optional)ts_current_free@Base 0.20.2 - (optional)ts_current_malloc@Base 0.20.2 - (optional)ts_current_realloc@Base 0.20.2 - (optional)ts_external_scanner_state_copy@Base 0.19 - (optional)ts_external_scanner_state_data@Base 0.19 - (optional)ts_external_scanner_state_delete@Base 0.19 - (optional)ts_external_scanner_state_eq@Base 0.19 - (optional)ts_external_scanner_state_init@Base 0.19 - (optional)ts_language_public_symbol@Base 0.19 - (optional)ts_language_symbol_metadata@Base 0.19 - (optional)ts_language_table_entry@Base 0.19 - (optional)ts_lexer_advance_to_end@Base 0.19 - (optional)ts_lexer_delete@Base 0.19 - (optional)ts_lexer_finish@Base 0.19 - (optional)ts_lexer_included_ranges@Base 0.19 - (optional)ts_lexer_init@Base 0.19 - (optional)ts_lexer_mark_end@Base 0.19 - (optional)ts_lexer_reset@Base 0.19 - (optional)ts_lexer_set_included_ranges@Base 0.19 - (optional)ts_lexer_set_input@Base 0.19 - (optional)ts_lexer_start@Base 0.19 - (optional)ts_node_new@Base 0.19 - (optional)ts_node_parent@Base 0.19 - (optional)ts_parser_reset@Base 0.19 - (optional)ts_query__step_is_fallible@Base 0.20.1 - (optional)ts_query_cursor__compare_captures@Base 0.19 - (optional)ts_query_cursor__compare_nodes@Base 0.19 - (optional)ts_range_array_get_changed_ranges@Base 0.19 - (optional)ts_range_array_intersects@Base 0.19 - (optional)ts_stack_can_merge@Base 0.19 - (optional)ts_subtree__print_dot_graph@Base 0.19 - (optional)ts_subtree_array_clear@Base 0.19 - (optional)ts_subtree_array_copy@Base 0.19 - (optional)ts_subtree_array_delete@Base 0.19 - (optional)ts_subtree_array_remove_trailing_extras@Base 0.19 - (optional)ts_subtree_array_reverse@Base 0.19 - (optional)ts_subtree_balance@Base 0.19 - (optional)ts_subtree_clone@Base 0.19 - (optional)ts_subtree_compare@Base 0.19 - (optional)ts_subtree_edit@Base 0.19 - (optional)ts_subtree_external_scanner_state@Base 0.20.7 - (optional)ts_subtree_external_scanner_state_eq@Base 0.19 - (optional)ts_subtree_get_changed_ranges@Base 0.19 - (optional)ts_subtree_last_external_token@Base 0.19 - (optional)ts_subtree_make_mut@Base 0.19 - (optional)ts_subtree_new_error@Base 0.19 - (optional)ts_subtree_new_error_node@Base 0.19 - (optional)ts_subtree_new_leaf@Base 0.19 - (optional)ts_subtree_new_missing_leaf@Base 0.19 - (optional)ts_subtree_new_node@Base 0.19 - (optional)ts_subtree_pool_delete@Base 0.19 - (optional)ts_subtree_pool_new@Base 0.19 - (optional)ts_subtree_print_dot_graph@Base 0.19 - (optional)ts_subtree_release@Base 0.19 - (optional)ts_subtree_retain@Base 0.19 - (optional)ts_subtree_set_symbol@Base 0.19 - (optional)ts_subtree_string@Base 0.19 - (optional)ts_subtree_summarize_children@Base 0.19 - (optional)ts_stack_clear@Base 0.19 - (optional)ts_stack_copy_version@Base 0.19 - (optional)ts_stack_delete@Base 0.19 - (optional)ts_stack_dynamic_precedence@Base 0.19 - (optional)ts_stack_error_cost@Base 0.19 - (optional)ts_stack_get_summary@Base 0.19 - (optional)ts_stack_halt@Base 0.19 - (optional)ts_stack_has_advanced_since_error@Base 0.19 - (optional)ts_stack_is_active@Base 0.19 - (optional)ts_stack_is_halted@Base 0.19 - (optional)ts_stack_is_paused@Base 0.19 - (optional)ts_stack_last_external_token@Base 0.19 - (optional)ts_stack_merge@Base 0.19 - (optional)ts_stack_new@Base 0.19 - (optional)ts_stack_node_count_since_error@Base 0.19 - (optional)ts_stack_pause@Base 0.19 - (optional)ts_stack_pop_all@Base 0.19 - (optional)ts_stack_pop_count@Base 0.19 - (optional)ts_stack_pop_error@Base 0.19 - (optional)ts_stack_pop_pending@Base 0.19 - (optional)ts_stack_position@Base 0.19 - (optional)ts_stack_print_dot_graph@Base 0.19 - (optional)ts_stack_push@Base 0.19 - (optional)ts_stack_record_summary@Base 0.19 - (optional)ts_stack_remove_version@Base 0.19 - (optional)ts_stack_renumber_version@Base 0.19 - (optional)ts_stack_resume@Base 0.19 - (optional)ts_stack_set_last_external_token@Base 0.19 - (optional)ts_stack_state@Base 0.19 - (optional)ts_stack_swap_versions@Base 0.19 - (optional)ts_stack_version_count@Base 0.19 - (optional)ts_tree_cursor_current_status@Base 0.19 - (optional)ts_tree_cursor_goto_first_child_internal@Base 0.20.8 - (optional)ts_tree_cursor_init@Base 0.19 - (optional)ts_tree_cursor_goto_next_sibling_internal@Base 0.20.8 - (optional)ts_tree_cursor_parent_node@Base 0.19 - (optional)ts_tree_new@Base 0.19 + ts_wasm_store_delete@Base 0.20.9 diff --git a/debian/patches/0001-Remove-wasm-feature.patch b/debian/patches/0001-Remove-wasm-feature.patch new file mode 100644 index 0000000..1e37a1c --- /dev/null +++ b/debian/patches/0001-Remove-wasm-feature.patch @@ -0,0 +1,234 @@ +From: James McCoy +Date: Sat, 10 Aug 2024 17:12:56 -0700 +Subject: Remove wasm feature + +wasmtime-c-api-impl is not packaged, so avoid exposing the feature for now. + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 1 - + cli/Cargo.toml | 2 -- + cli/loader/Cargo.toml | 1 - + cli/src/lib.rs | 2 ++ + cli/src/main.rs | 46 +++++++++++++++++++++++++++++++--------------- + cli/src/wasm.rs | 1 + + lib/Cargo.toml | 7 ------- + 7 files changed, 34 insertions(+), 26 deletions(-) + +diff --git a/Cargo.toml b/Cargo.toml +index 53caf23..4958503 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -82,7 +82,6 @@ tiny_http = "0.12.0" + toml = "0.8.12" + unindent = "0.2.3" + walkdir = "2.5.0" +-wasmparser = "0.206.0" + webbrowser = "1.0.0" + + tree-sitter = { version = "0.22.6", path = "./lib" } +diff --git a/cli/Cargo.toml b/cli/Cargo.toml +index fd2136a..f53c944 100644 +--- a/cli/Cargo.toml ++++ b/cli/Cargo.toml +@@ -22,7 +22,6 @@ name = "benchmark" + harness = false + + [features] +-wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"] + + [dependencies] + ansi_term.workspace = true +@@ -51,7 +50,6 @@ serde_json.workspace = true + smallbitvec.workspace = true + tiny_http.workspace = true + walkdir.workspace = true +-wasmparser.workspace = true + webbrowser.workspace = true + + tree-sitter.workspace = true +diff --git a/cli/loader/Cargo.toml b/cli/loader/Cargo.toml +index bff2f63..4083ade 100644 +--- a/cli/loader/Cargo.toml ++++ b/cli/loader/Cargo.toml +@@ -13,7 +13,6 @@ keywords.workspace = true + categories.workspace = true + + [features] +-wasm = ["tree-sitter/wasm"] + + [dependencies] + anyhow.workspace = true +diff --git a/cli/src/lib.rs b/cli/src/lib.rs +index 549db77..d7a7e3a 100644 +--- a/cli/src/lib.rs ++++ b/cli/src/lib.rs +@@ -4,6 +4,7 @@ pub mod generate; + pub mod highlight; + pub mod logger; + pub mod parse; ++#[cfg(feature = "wasm")] + pub mod playground; + pub mod query; + pub mod query_testing; +@@ -12,6 +13,7 @@ pub mod test; + pub mod test_highlight; + pub mod test_tags; + pub mod util; ++#[cfg(feature = "wasm")] + pub mod wasm; + + #[cfg(test)] +diff --git a/cli/src/main.rs b/cli/src/main.rs +index 94df332..1831149 100644 +--- a/cli/src/main.rs ++++ b/cli/src/main.rs +@@ -14,10 +14,12 @@ use tree_sitter_cli::{ + generate::{self, lookup_package_json_for_path}, + highlight, logger, + parse::{self, ParseFileOptions, ParseOutput}, +- playground, query, tags, ++ query, tags, + test::{self, TestOptions}, +- test_highlight, test_tags, util, wasm, ++ test_highlight, test_tags, util, + }; ++#[cfg(feature = "wasm")] ++use tree_sitter_cli::{playground, wasm}; + use tree_sitter_config::Config; + use tree_sitter_highlight::Highlighter; + use tree_sitter_loader as loader; +@@ -33,12 +35,14 @@ enum Commands { + InitConfig(InitConfig), + Generate(Generate), + Build(Build), ++ #[cfg(feature = "wasm")] + BuildWasm(BuildWasm), + Parse(Parse), + Test(Test), + Query(Query), + Highlight(Highlight), + Tags(Tags), ++ #[cfg(feature = "wasm")] + Playground(Playground), + DumpLanguages(DumpLanguages), + } +@@ -121,6 +125,7 @@ struct Build { + pub internal_build: bool, + } + ++#[cfg(feature = "wasm")] + #[derive(Args)] + #[command(about = "Compile a parser to WASM", alias = "bw")] + struct BuildWasm { +@@ -159,6 +164,7 @@ struct Parse { + help = "Produce the log.html file with debug graphs" + )] + pub debug_graph: bool, ++ #[cfg(feature = "wasm")] + #[arg( + long, + help = "Compile parsers to wasm instead of native dynamic libraries" +@@ -237,6 +243,7 @@ struct Test { + help = "Produce the log.html file with debug graphs" + )] + pub debug_graph: bool, ++ #[cfg(feature = "wasm")] + #[arg( + long, + help = "Compile parsers to wasm instead of native dynamic libraries" +@@ -344,6 +351,7 @@ struct Tags { + pub config_path: Option, + } + ++#[cfg(feature = "wasm")] + #[derive(Args)] + #[command( + about = "Start local playground for a parser in the browser", +@@ -466,19 +474,25 @@ fn run() -> Result<()> { + + Commands::Build(build_options) => { + if build_options.wasm { +- let grammar_path = +- current_dir.join(build_options.path.as_deref().unwrap_or_default()); +- let output_path = build_options.output.map(|path| current_dir.join(path)); +- let root_path = lookup_package_json_for_path(&grammar_path.join("package.json")) +- .map(|(p, _)| p.parent().unwrap().to_path_buf())?; +- wasm::compile_language_to_wasm( +- &loader, +- Some(&root_path), +- &grammar_path, +- ¤t_dir, +- output_path, +- build_options.docker, +- )?; ++ if !cfg!(feature = "wasm") { ++ return Err(anyhow!("cli was not build with wasm feature enabled")); ++ } ++ #[cfg(feature = "wasm")] ++ { ++ let grammar_path = ++ current_dir.join(build_options.path.as_deref().unwrap_or_default()); ++ let output_path = build_options.output.map(|path| current_dir.join(path)); ++ let root_path = lookup_package_json_for_path(&grammar_path.join("package.json")) ++ .map(|(p, _)| p.parent().unwrap().to_path_buf())?; ++ wasm::compile_language_to_wasm( ++ &loader, ++ Some(&root_path), ++ &grammar_path, ++ ¤t_dir, ++ output_path, ++ build_options.docker, ++ )?; ++ } + } else { + let grammar_path = + current_dir.join(build_options.path.as_deref().unwrap_or_default()); +@@ -521,6 +535,7 @@ fn run() -> Result<()> { + } + } + ++ #[cfg(feature = "wasm")] + Commands::BuildWasm(wasm_options) => { + eprintln!("`build-wasm` is deprecated and will be removed in v0.24.0. You should use `build --wasm` instead"); + let grammar_path = current_dir.join(wasm_options.path.unwrap_or_default()); +@@ -905,6 +920,7 @@ fn run() -> Result<()> { + )?; + } + ++ #[cfg(feature = "wasm")] + Commands::Playground(playground_options) => { + let open_in_browser = !playground_options.quiet; + let grammar_path = playground_options +diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs +index 7782a33..dfe60f5 100644 +--- a/cli/src/wasm.rs ++++ b/cli/src/wasm.rs +@@ -1,3 +1,4 @@ ++#![cfg(feature = "wasm")] + use std::{ + fs, + path::{Path, PathBuf}, +diff --git a/lib/Cargo.toml b/lib/Cargo.toml +index 4d9e8b9..f9d834f 100644 +--- a/lib/Cargo.toml ++++ b/lib/Cargo.toml +@@ -26,17 +26,10 @@ include = [ + ] + + [features] +-wasm = ["wasmtime-c-api"] + + [dependencies] + regex.workspace = true + +-[dependencies.wasmtime-c-api] +-version = "19" +-optional = true +-package = "wasmtime-c-api-impl" +-default-features = false +- + [build-dependencies] + bindgen = { version = "0.69.4", optional = true } + cc.workspace = true diff --git a/debian/patches/0002-Relax-clap-dependency-to-4.4.18.patch b/debian/patches/0002-Relax-clap-dependency-to-4.4.18.patch new file mode 100644 index 0000000..97b427f --- /dev/null +++ b/debian/patches/0002-Relax-clap-dependency-to-4.4.18.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Sat, 10 Aug 2024 23:29:25 -0700 +Subject: Relax clap dependency to 4.4.18 + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Cargo.toml b/Cargo.toml +index 4958503..eed8319 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -42,7 +42,7 @@ ansi_term = "0.12.1" + anstyle = "1.0.6" + anyhow = "1.0.82" + cc = "1.0.95" +-clap = { version = "4.5.4", features = [ ++clap = { version = "4.4.18", features = [ + "cargo", + "derive", + "env", diff --git a/debian/patches/0003-Relax-ctrlc-dependency-to-3.4.2.patch b/debian/patches/0003-Relax-ctrlc-dependency-to-3.4.2.patch new file mode 100644 index 0000000..e7952c6 --- /dev/null +++ b/debian/patches/0003-Relax-ctrlc-dependency-to-3.4.2.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Sat, 10 Aug 2024 23:29:55 -0700 +Subject: Relax ctrlc dependency to 3.4.2 + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Cargo.toml b/Cargo.toml +index eed8319..3b1415b 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -50,7 +50,7 @@ clap = { version = "4.4.18", features = [ + "unstable-styles", + ] } + ctor = "0.2.8" +-ctrlc = { version = "3.4.4", features = ["termination"] } ++ctrlc = { version = "3.4.2", features = ["termination"] } + difference = "2.0.0" + dirs = "5.0.1" + filetime = "0.2.23" diff --git a/debian/patches/0004-Relax-html-escape-dependency-to-0.2.12.patch b/debian/patches/0004-Relax-html-escape-dependency-to-0.2.12.patch new file mode 100644 index 0000000..7f1cadc --- /dev/null +++ b/debian/patches/0004-Relax-html-escape-dependency-to-0.2.12.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Sat, 10 Aug 2024 23:30:13 -0700 +Subject: Relax html-escape dependency to 0.2.12 + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Cargo.toml b/Cargo.toml +index 3b1415b..81097b5 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -58,7 +58,7 @@ fs4 = "0.8.2" + git2 = "0.18.3" + glob = "0.3.1" + heck = "0.5.0" +-html-escape = "0.2.13" ++html-escape = "0.2.12" + indexmap = "2.2.6" + indoc = "2.0.5" + lazy_static = "1.4.0" diff --git a/debian/patches/0005-Relax-memchr-dependency-to-2.7.1.patch b/debian/patches/0005-Relax-memchr-dependency-to-2.7.1.patch new file mode 100644 index 0000000..2eb4725 --- /dev/null +++ b/debian/patches/0005-Relax-memchr-dependency-to-2.7.1.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Sat, 10 Aug 2024 23:30:33 -0700 +Subject: Relax memchr dependency to 2.7.1 + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Cargo.toml b/Cargo.toml +index 81097b5..903b42f 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -64,7 +64,7 @@ indoc = "2.0.5" + lazy_static = "1.4.0" + libloading = "0.8.3" + log = { version = "0.4.21", features = ["std"] } +-memchr = "2.7.2" ++memchr = "2.7.1" + once_cell = "1.19.0" + pretty_assertions = "1.4.0" + rand = "0.8.5" diff --git a/debian/patches/0006-Relax-regex-syntax-dependency-to-0.8.2.patch b/debian/patches/0006-Relax-regex-syntax-dependency-to-0.8.2.patch new file mode 100644 index 0000000..2366931 --- /dev/null +++ b/debian/patches/0006-Relax-regex-syntax-dependency-to-0.8.2.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Sat, 10 Aug 2024 23:30:51 -0700 +Subject: Relax regex-syntax dependency to 0.8.2 + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Cargo.toml b/Cargo.toml +index 903b42f..3642efc 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -69,7 +69,7 @@ once_cell = "1.19.0" + pretty_assertions = "1.4.0" + rand = "0.8.5" + regex = "1.10.4" +-regex-syntax = "0.8.3" ++regex-syntax = "0.8.2" + rustc-hash = "1.1.0" + semver = "1.0.22" + serde = { version = "1.0.198", features = ["derive"] } diff --git a/debian/patches/0007-Relax-semver-dependency-to-1.0.21.patch b/debian/patches/0007-Relax-semver-dependency-to-1.0.21.patch new file mode 100644 index 0000000..fc298da --- /dev/null +++ b/debian/patches/0007-Relax-semver-dependency-to-1.0.21.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Sat, 10 Aug 2024 23:31:10 -0700 +Subject: Relax semver dependency to 1.0.21 + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Cargo.toml b/Cargo.toml +index 3642efc..0da084d 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -71,7 +71,7 @@ rand = "0.8.5" + regex = "1.10.4" + regex-syntax = "0.8.2" + rustc-hash = "1.1.0" +-semver = "1.0.22" ++semver = "1.0.21" + serde = { version = "1.0.198", features = ["derive"] } + serde_derive = "1.0.197" + serde_json = { version = "1.0.116", features = ["preserve_order"] } diff --git a/debian/patches/0008-Relax-smallbitvec-dependency-to-2.5.1.patch b/debian/patches/0008-Relax-smallbitvec-dependency-to-2.5.1.patch new file mode 100644 index 0000000..746537e --- /dev/null +++ b/debian/patches/0008-Relax-smallbitvec-dependency-to-2.5.1.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Sat, 10 Aug 2024 23:31:29 -0700 +Subject: Relax smallbitvec dependency to 2.5.1 + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Cargo.toml b/Cargo.toml +index 0da084d..0d01c3a 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -75,7 +75,7 @@ semver = "1.0.21" + serde = { version = "1.0.198", features = ["derive"] } + serde_derive = "1.0.197" + serde_json = { version = "1.0.116", features = ["preserve_order"] } +-smallbitvec = "2.5.3" ++smallbitvec = "2.5.1" + tempfile = "3.10.1" + thiserror = "1.0.59" + tiny_http = "0.12.0" diff --git a/debian/patches/0009-Relax-webbrowser-dependency-to-allow-0.8.patch b/debian/patches/0009-Relax-webbrowser-dependency-to-allow-0.8.patch new file mode 100644 index 0000000..896651c --- /dev/null +++ b/debian/patches/0009-Relax-webbrowser-dependency-to-allow-0.8.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Sat, 10 Aug 2024 23:32:30 -0700 +Subject: Relax webbrowser dependency to allow 0.8 + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Cargo.toml b/Cargo.toml +index 0d01c3a..2abf046 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -82,7 +82,7 @@ tiny_http = "0.12.0" + toml = "0.8.12" + unindent = "0.2.3" + walkdir = "2.5.0" +-webbrowser = "1.0.0" ++webbrowser = ">= 0.8, < 2" + + tree-sitter = { version = "0.22.6", path = "./lib" } + tree-sitter-loader = { version = "0.22.6", path = "./cli/loader" } diff --git a/debian/patches/0010-Relax-heck-dependency-to-0.4.0.patch b/debian/patches/0010-Relax-heck-dependency-to-0.4.0.patch new file mode 100644 index 0000000..91a1be3 --- /dev/null +++ b/debian/patches/0010-Relax-heck-dependency-to-0.4.0.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Sun, 11 Aug 2024 00:26:05 -0700 +Subject: Relax heck dependency to 0.4.0 + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Cargo.toml b/Cargo.toml +index 2abf046..780abee 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -57,7 +57,7 @@ filetime = "0.2.23" + fs4 = "0.8.2" + git2 = "0.18.3" + glob = "0.3.1" +-heck = "0.5.0" ++heck = "0.4.0" + html-escape = "0.2.12" + indexmap = "2.2.6" + indoc = "2.0.5" diff --git a/debian/patches/0011-Relax-anstyle-dependency-to-1.0.4.patch b/debian/patches/0011-Relax-anstyle-dependency-to-1.0.4.patch new file mode 100644 index 0000000..d802889 --- /dev/null +++ b/debian/patches/0011-Relax-anstyle-dependency-to-1.0.4.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Sun, 11 Aug 2024 00:28:02 -0700 +Subject: Relax anstyle dependency to 1.0.4 + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Cargo.toml b/Cargo.toml +index 780abee..0ecb634 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -39,7 +39,7 @@ strip = false + + [workspace.dependencies] + ansi_term = "0.12.1" +-anstyle = "1.0.6" ++anstyle = "1.0.4" + anyhow = "1.0.82" + cc = "1.0.95" + clap = { version = "4.4.18", features = [ diff --git a/debian/patches/0012-Report-deb-built-using-tree-sitter-0-when-building-t.patch b/debian/patches/0012-Report-deb-built-using-tree-sitter-0-when-building-t.patch new file mode 100644 index 0000000..894b1e0 --- /dev/null +++ b/debian/patches/0012-Report-deb-built-using-tree-sitter-0-when-building-t.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Tue, 24 Sep 2024 21:15:04 -0400 +Subject: Report deb-built-using=tree-sitter=0 when building tree-sitter crate + +The license is MIT, so there is no requirement to distribute source along with binaries. + +Signed-off-by: James McCoy +--- + lib/binding_rust/build.rs | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/lib/binding_rust/build.rs b/lib/binding_rust/build.rs +index 19cb0f6..c47d65e 100644 +--- a/lib/binding_rust/build.rs ++++ b/lib/binding_rust/build.rs +@@ -47,6 +47,7 @@ fn main() { + .file(src_path.join("lib.c")) + .compile("tree-sitter"); + ++ println!("dh-cargo:deb-built-using=tree-sitter=0={}", env!("CARGO_MANIFEST_DIR")); + println!("cargo:include={}", include_path.display()); + } + diff --git a/debian/patches/0013-Relax-ctor-dependency-to-0.1-0.3.patch b/debian/patches/0013-Relax-ctor-dependency-to-0.1-0.3.patch new file mode 100644 index 0000000..4bf20b4 --- /dev/null +++ b/debian/patches/0013-Relax-ctor-dependency-to-0.1-0.3.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Tue, 5 Nov 2024 08:51:15 -0500 +Subject: Relax ctor dependency to >= 0.1, < 0.3 + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Cargo.toml b/Cargo.toml +index 0ecb634..91dc061 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -49,7 +49,7 @@ clap = { version = "4.4.18", features = [ + "help", + "unstable-styles", + ] } +-ctor = "0.2.8" ++ctor = ">= 0.1, < 0.3" + ctrlc = { version = "3.4.2", features = ["termination"] } + difference = "2.0.0" + dirs = "5.0.1" diff --git a/debian/patches/0014-Relax-fs4-dependency-to-0.9-0.12.patch b/debian/patches/0014-Relax-fs4-dependency-to-0.9-0.12.patch new file mode 100644 index 0000000..b9b00ac --- /dev/null +++ b/debian/patches/0014-Relax-fs4-dependency-to-0.9-0.12.patch @@ -0,0 +1,40 @@ +From: James McCoy +Date: Tue, 12 Nov 2024 22:16:45 -0500 +Subject: Relax fs4 dependency to >= 0.9, < 0.12 + +fs4::FileExt moved to fs4::fs_std::FileExt in 0.9, so take upstream's +patch for that. + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + Cargo.toml | 2 +- + cli/loader/src/lib.rs | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/Cargo.toml b/Cargo.toml +index 91dc061..8b57aa5 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -54,7 +54,7 @@ ctrlc = { version = "3.4.2", features = ["termination"] } + difference = "2.0.0" + dirs = "5.0.1" + filetime = "0.2.23" +-fs4 = "0.8.2" ++fs4 = ">= 0.9, < 0.12" + git2 = "0.18.3" + glob = "0.3.1" + heck = "0.4.0" +diff --git a/cli/loader/src/lib.rs b/cli/loader/src/lib.rs +index 3371e1f..9b67416 100644 +--- a/cli/loader/src/lib.rs ++++ b/cli/loader/src/lib.rs +@@ -15,7 +15,7 @@ use std::{ + }; + + use anyhow::{anyhow, Context, Error, Result}; +-use fs4::FileExt; ++use fs4::fs_std::FileExt; + use indoc::indoc; + use libloading::{Library, Symbol}; + use once_cell::unsync::OnceCell; diff --git a/debian/patches/0015-lib-Bump-bindgen-dependency-to-0.70.patch b/debian/patches/0015-lib-Bump-bindgen-dependency-to-0.70.patch new file mode 100644 index 0000000..f4b41f5 --- /dev/null +++ b/debian/patches/0015-lib-Bump-bindgen-dependency-to-0.70.patch @@ -0,0 +1,23 @@ +From: James McCoy +Date: Sat, 30 Nov 2024 20:28:59 -0500 +Subject: lib: Bump bindgen dependency to 0.70 + +Forwarded: not-needed +Signed-off-by: James McCoy +--- + lib/Cargo.toml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/Cargo.toml b/lib/Cargo.toml +index f9d834f..3bfb868 100644 +--- a/lib/Cargo.toml ++++ b/lib/Cargo.toml +@@ -31,7 +31,7 @@ include = [ + regex.workspace = true + + [build-dependencies] +-bindgen = { version = "0.69.4", optional = true } ++bindgen = { version = "0.70", optional = true } + cc.workspace = true + + [lib] diff --git a/debian/patches/series b/debian/patches/series new file mode 100644 index 0000000..712baa2 --- /dev/null +++ b/debian/patches/series @@ -0,0 +1,15 @@ +0001-Remove-wasm-feature.patch +0002-Relax-clap-dependency-to-4.4.18.patch +0003-Relax-ctrlc-dependency-to-3.4.2.patch +0004-Relax-html-escape-dependency-to-0.2.12.patch +0005-Relax-memchr-dependency-to-2.7.1.patch +0006-Relax-regex-syntax-dependency-to-0.8.2.patch +0007-Relax-semver-dependency-to-1.0.21.patch +0008-Relax-smallbitvec-dependency-to-2.5.1.patch +0009-Relax-webbrowser-dependency-to-allow-0.8.patch +0010-Relax-heck-dependency-to-0.4.0.patch +0011-Relax-anstyle-dependency-to-1.0.4.patch +0012-Report-deb-built-using-tree-sitter-0-when-building-t.patch +0013-Relax-ctor-dependency-to-0.1-0.3.patch +0014-Relax-fs4-dependency-to-0.9-0.12.patch +0015-lib-Bump-bindgen-dependency-to-0.70.patch diff --git a/debian/rules b/debian/rules index 573ec2c..3f7ee69 100755 --- a/debian/rules +++ b/debian/rules @@ -9,5 +9,15 @@ CFLAGS += $(CPPFLAGS) %: dh $@ +override_dh_auto_build: + dh_auto_build -- PREFIX=/usr LIBDIR=/usr/lib/$(DEB_HOST_MULTIARCH) + dh_auto_build --buildsystem=rust + +# tree-sitter-cli's tests require downloading fixtures at build time, so we can't run those. +# xtask is an internal crate used for release automation +override_dh_auto_test: + dh_auto_test --buildsystem=rust -- --tests --workspace --exclude tree-sitter-cli --exclude xtask + override_dh_auto_install: dh_auto_install -- PREFIX=/usr LIBDIR=/usr/lib/$(DEB_HOST_MULTIARCH) + dh_auto_install --buildsystem=rust diff --git a/debian/tests/control b/debian/tests/control new file mode 100644 index 0000000..21e9e6c --- /dev/null +++ b/debian/tests/control @@ -0,0 +1,150 @@ +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter 0.22.6 --all-targets --all-features +Features: + test-name=librust-tree-sitter-dev:@, +Depends: + dh-cargo (>= 31), + librust-bindgen-0.70+default-dev, + librust-tree-sitter-dev, + rustc (>= 1.65), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter 0.22.6 --all-targets +Features: + test-name=librust-tree-sitter-dev:default, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-dev, + rustc (>= 1.65), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter 0.22.6 --all-targets --no-default-features +Features: + test-name=librust-tree-sitter-dev:, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-dev, + rustc (>= 1.65), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter-config 0.22.6 --all-targets --all-features +Features: + test-name=librust-tree-sitter-config-dev:@, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-config-dev, + rustc (>= 1.74.1), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter-config 0.22.6 --all-targets +Features: + test-name=librust-tree-sitter-config-dev:default, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-config-dev, + rustc (>= 1.74.1), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter-config 0.22.6 --all-targets --no-default-features +Features: + test-name=librust-tree-sitter-config-dev:, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-config-dev, + rustc (>= 1.74.1), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter-highlight 0.22.6 --all-targets --all-features +Features: + test-name=librust-tree-sitter-highlight-dev:@, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-highlight-dev, + rustc (>= 1.74.1), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter-highlight 0.22.6 --all-targets +Features: + test-name=librust-tree-sitter-highlight-dev:default, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-highlight-dev, + rustc (>= 1.74.1), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter-highlight 0.22.6 --all-targets --no-default-features +Features: + test-name=librust-tree-sitter-highlight-dev:, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-highlight-dev, + rustc (>= 1.74.1), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter-loader 0.22.6 --all-targets --all-features +Features: + test-name=librust-tree-sitter-loader-dev:@, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-loader-dev, + rustc (>= 1.74.1), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter-loader 0.22.6 --all-targets +Features: + test-name=librust-tree-sitter-loader-dev:default, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-loader-dev, + rustc (>= 1.74.1), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter-loader 0.22.6 --all-targets --no-default-features +Features: + test-name=librust-tree-sitter-loader-dev:, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-loader-dev, + rustc (>= 1.74.1), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter-tags 0.22.6 --all-targets --all-features +Features: + test-name=librust-tree-sitter-tags-dev:@, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-tags-dev, + rustc (>= 1.74.1), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter-tags 0.22.6 --all-targets +Features: + test-name=librust-tree-sitter-tags-dev:default, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-tags-dev, + rustc (>= 1.74.1), +Restrictions: + allow-stderr, + +Test-Command: /usr/share/cargo/bin/cargo-auto-test tree-sitter-tags 0.22.6 --all-targets --no-default-features +Features: + test-name=librust-tree-sitter-tags-dev:, +Depends: + dh-cargo (>= 31), + librust-tree-sitter-tags-dev, + rustc (>= 1.74.1), +Restrictions: + allow-stderr, diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index 3b2801b..47aca53 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -1,22 +1,32 @@ GEM remote: https://rubygems.org/ specs: - activesupport (7.0.4.3) + activesupport (7.1.3) + base64 + bigdecimal concurrent-ruby (~> 1.0, >= 1.0.2) + connection_pool (>= 2.2.5) + drb i18n (>= 1.6, < 2) minitest (>= 5.1) + mutex_m tzinfo (~> 2.0) addressable (2.8.1) public_suffix (>= 2.0.2, < 6.0) + base64 (0.2.0) + bigdecimal (3.1.6) coffee-script (2.4.1) coffee-script-source execjs coffee-script-source (1.11.1) colorator (1.1.0) - commonmarker (0.23.8) - concurrent-ruby (1.2.2) + commonmarker (0.23.10) + concurrent-ruby (1.2.3) + connection_pool (2.4.1) dnsruby (1.61.9) simpleidn (~> 0.1) + drb (2.2.0) + ruby2_keywords em-websocket (0.5.3) eventmachine (>= 0.12.9) http_parser.rb (~> 0) @@ -86,7 +96,7 @@ GEM activesupport (>= 2) nokogiri (>= 1.4) http_parser.rb (0.8.0) - i18n (1.12.0) + i18n (1.14.1) concurrent-ruby (~> 1.0) jekyll (3.9.3) addressable (~> 2.4) @@ -209,8 +219,9 @@ GEM jekyll (>= 3.5, < 5.0) jekyll-feed (~> 0.9) jekyll-seo-tag (~> 2.1) - minitest (5.18.0) - nokogiri (1.14.2-x86_64-linux) + minitest (5.21.2) + mutex_m (0.2.0) + nokogiri (1.16.2-x86_64-linux) racc (~> 1.4) octokit (4.25.1) faraday (>= 1, < 3) @@ -218,7 +229,7 @@ GEM pathutil (0.16.2) forwardable-extended (~> 2.6) public_suffix (4.0.7) - racc (1.6.2) + racc (1.7.3) rb-fsevent (0.11.2) rb-inotify (0.10.1) ffi (~> 1.0) @@ -250,7 +261,7 @@ GEM webrick (1.8.1) PLATFORMS - ruby + x86_64-linux DEPENDENCIES github-pages diff --git a/docs/assets/css/style.scss b/docs/assets/css/style.scss index 2b7a018..b838211 100644 --- a/docs/assets/css/style.scss +++ b/docs/assets/css/style.scss @@ -33,6 +33,16 @@ a[href^="http"]:after { padding: $padding 0; } +#main-content code:not(pre code, a code) { + color: #c7254e; + font-size: 0.9em; + background-color: #f8f8f8; + border: 1px solid #eaeaea; + border-radius: 3px; + margin: 0 2px; + padding: 0 5px; +} + #sidebar { position: fixed; background: white; @@ -162,6 +172,10 @@ a > span { .CodeMirror div.CodeMirror-cursor { border-left: 3px solid red; } + + h4#about { + margin: 10ex 0 0 0; + } } #output-container { diff --git a/docs/assets/js/playground.js b/docs/assets/js/playground.js index c14bf0f..5864d97 100644 --- a/docs/assets/js/playground.js +++ b/docs/assets/js/playground.js @@ -1,7 +1,7 @@ let tree; (async () => { - const CAPTURE_REGEX = /@\s*([\w\._-]+)/g; + const CAPTURE_REGEX = /@\s*([\w._-]+)/g; const COLORS_BY_INDEX = [ 'blue', 'chocolate', @@ -18,8 +18,6 @@ let tree; 'sienna', ]; - const scriptURL = document.currentScript.getAttribute('src'); - const codeInput = document.getElementById('code-input'); const languageSelect = document.getElementById('language-select'); const loggingCheckbox = document.getElementById('logging-checkbox'); @@ -102,8 +100,8 @@ let tree; handleQueryChange(); } - async function handleCodeChange(editor, changes) { - const newText = codeEditor.getValue() + '\n'; + async function handleCodeChange(_editor, changes) { + const newText = `${codeEditor.getValue()}\n`; const edits = tree && changes && changes.map(treeEditForEditorChange); const start = performance.now(); @@ -128,9 +126,9 @@ let tree; isRendering++; const cursor = tree.walk(); - let currentRenderCount = parseCount; + const currentRenderCount = parseCount; let row = ''; - let rows = []; + const rows = []; let finishedRow = false; let visitedChildren = false; let indentLevel = 0; @@ -175,7 +173,7 @@ let tree; const start = cursor.startPosition; const end = cursor.endPosition; const id = cursor.nodeId; - let fieldName = cursor.currentFieldName(); + let fieldName = cursor.currentFieldName; if (fieldName) { fieldName += ': '; } else { @@ -319,7 +317,7 @@ let tree; start.column > end.column ) ) { - let swap = end; + const swap = end; end = start; start = swap; } @@ -445,14 +443,14 @@ let tree; } function debounce(func, wait, immediate) { - var timeout; + let timeout; return function() { - var context = this, args = arguments; - var later = function() { + const context = this, args = arguments; + const later = function() { timeout = null; if (!immediate) func.apply(context, args); }; - var callNow = immediate && !timeout; + const callNow = immediate && !timeout; clearTimeout(timeout); timeout = setTimeout(later, wait); if (callNow) func.apply(context, args); diff --git a/docs/index.md b/docs/index.md index 2e3b59e..8949aa0 100644 --- a/docs/index.md +++ b/docs/index.md @@ -15,35 +15,43 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca There are currently bindings that allow Tree-sitter to be used from the following languages: +* [C#](https://github.com/tree-sitter/csharp-tree-sitter) * [Go](https://github.com/smacker/go-tree-sitter) +* [Guile](https://github.com/Z572/guile-ts) * [Haskell](https://github.com/tree-sitter/haskell-tree-sitter) +* [Java](https://github.com/serenadeai/java-tree-sitter) +* [Java](https://github.com/bonede/tree-sitter-ng) +* [Java (Android)](https://github.com/AndroidIDEOfficial/android-tree-sitter) * [JavaScript (Node.js)](https://github.com/tree-sitter/node-tree-sitter) * [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) +* [Kotlin](https://github.com/oxisto/kotlintree) * [Lua](https://github.com/euclidianAce/ltreesitter) * [OCaml](https://github.com/returntocorp/ocaml-tree-sitter-core) +* [Odin](https://github.com/laytan/odin-tree-sitter) * [Perl](https://metacpan.org/pod/Text::Treesitter) * [Python](https://github.com/tree-sitter/py-tree-sitter) -* [Ruby](https://github.com/tree-sitter/ruby-tree-sitter) +* [Ruby](https://github.com/Faveod/ruby-tree-sitter) * [Ruby](https://github.com/calicoday/ruby-tree-sitter-ffi) * [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) * [Swift](https://github.com/ChimeHQ/SwiftTreeSitter) -* [Kotlin](https://github.com/oxisto/kotlintree) -* [Java](https://github.com/serenadeai/java-tree-sitter) ### Parsers * [Ada](https://github.com/briot/tree-sitter-ada) * [Agda](https://github.com/tree-sitter/tree-sitter-agda) * [Apex](https://github.com/aheber/tree-sitter-sfapex) +* [ApexCode](https://github.com/jsuarez-chipiron/tree-sitter-apex) +* [AWS Event Rule](https://github.com/3p3r/tree-sitter-eventrule) * [Bash](https://github.com/tree-sitter/tree-sitter-bash) * [Beancount](https://github.com/zwpaper/tree-sitter-beancount) * [Cap'n Proto](https://github.com/amaanq/tree-sitter-capnp) * [C](https://github.com/tree-sitter/tree-sitter-c) * [C++](https://github.com/tree-sitter/tree-sitter-cpp) * [C#](https://github.com/tree-sitter/tree-sitter-c-sharp) +* [CEL](https://github.com/bufbuild/tree-sitter-cel) * [Clojure](https://github.com/sogaiu/tree-sitter-clojure) * [CMake](https://github.com/uyha/tree-sitter-cmake) -* [Comment](https://github.com/stsewd/tree-sitter-comment) +* [COBOL](https://github.com/yutaro-sakamoto/tree-sitter-cobol) * [Common Lisp](https://github.com/theHamsta/tree-sitter-commonlisp) * [CSS](https://github.com/tree-sitter/tree-sitter-css) * [CUDA](https://github.com/theHamsta/tree-sitter-cuda) @@ -68,45 +76,56 @@ There are currently bindings that allow Tree-sitter to be used from the followin * [Go](https://github.com/tree-sitter/tree-sitter-go) * [Go mod](https://github.com/camdencheek/tree-sitter-go-mod) * [Go work](https://github.com/omertuc/tree-sitter-go-work) -* [Graphql](https://github.com/bkegley/tree-sitter-graphql) +* [GraphQL](https://github.com/bkegley/tree-sitter-graphql) * [Hack](https://github.com/slackhq/tree-sitter-hack) * [Haskell](https://github.com/tree-sitter/tree-sitter-haskell) * [HCL](https://github.com/MichaHoffmann/tree-sitter-hcl) * [HTML](https://github.com/tree-sitter/tree-sitter-html) +* [ISPC](https://github.com/fab4100/tree-sitter-ispc) * [Java](https://github.com/tree-sitter/tree-sitter-java) * [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript) * [jq](https://github.com/flurie/tree-sitter-jq) -* [JSON5](https://github.com/Joakker/tree-sitter-json5) * [JSON](https://github.com/tree-sitter/tree-sitter-json) +* [JSON5](https://github.com/Joakker/tree-sitter-json5) * [Julia](https://github.com/tree-sitter/tree-sitter-julia) +* [Just](https://github.com/IndianBoy42/tree-sitter-just) * [Kotlin](https://github.com/fwcd/tree-sitter-kotlin) * [LALRPOP](https://github.com/traxys/tree-sitter-lalrpop) -* [Latex](https://github.com/latex-lsp/tree-sitter-latex) +* [LaTeX](https://github.com/latex-lsp/tree-sitter-latex) * [Lean](https://github.com/Julian/tree-sitter-lean) * [LLVM](https://github.com/benwilliamgraham/tree-sitter-llvm) * [LLVM MachineIR](https://github.com/Flakebi/tree-sitter-llvm-mir) +* [LLVM MLIR](https://github.com/artagnon/tree-sitter-mlir) * [LLVM TableGen](https://github.com/Flakebi/tree-sitter-tablegen) -* [Lua](https://github.com/Azganoth/tree-sitter-lua) +* [Lua](https://github.com/MunifTanjim/tree-sitter-lua) +* [Magik](https://github.com/krn-robin/tree-sitter-magik) * [Make](https://github.com/alemuller/tree-sitter-make) * [Markdown](https://github.com/ikatyang/tree-sitter-markdown) * [Markdown](https://github.com/MDeiml/tree-sitter-markdown) * [Meson](https://github.com/Decodetalkers/tree-sitter-meson) * [Meson](https://github.com/staysail/tree-sitter-meson) -* [Motorola 68000 Assembly](https://github.com/grahambates/tree-sitter-m68k) +* [Motorola 68000 assembly](https://github.com/grahambates/tree-sitter-m68k) +* [NGINX](https://gitlab.com/joncoole/tree-sitter-nginx) +* [Nim](https://github.com/alaviss/tree-sitter-nim) * [Nix](https://github.com/cstrahan/tree-sitter-nix) +* [Noir](https://github.com/hhamud/tree-sitter-noir) * [Objective-C](https://github.com/jiyee/tree-sitter-objc) * [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml) +* [Odin](https://github.com/amaanq/tree-sitter-odin) +* [Ohm](https://github.com/novusnota/tree-sitter-ohm) * [Org](https://github.com/milisims/tree-sitter-org) +* [P4](https://github.com/ace-design/tree-sitter-p4) * [Pascal](https://github.com/Isopod/tree-sitter-pascal) * [Perl](https://github.com/ganezdragon/tree-sitter-perl) * [Perl](https://github.com/tree-sitter-perl/tree-sitter-perl) * [Perl POD](https://github.com/tree-sitter-perl/tree-sitter-pod) * [PHP](https://github.com/tree-sitter/tree-sitter-php) * [Portable Game Notation](https://github.com/rolandwalker/tree-sitter-pgn) -* [PowerShell](https://github.com/PowerShell/tree-sitter-PowerShell) +* [PowerShell](https://github.com/airbus-cert/tree-sitter-powershell) * [Protocol Buffers](https://github.com/mitchellh/tree-sitter-proto) * [Python](https://github.com/tree-sitter/tree-sitter-python) * [QML](https://github.com/yuja/tree-sitter-qmljs) +* [QuakeC](https://github.com/vkazanov/tree-sitter-quakec) * [Racket](https://github.com/6cdh/tree-sitter-racket) * [Rasi](https://github.com/Fymyte/tree-sitter-rasi) * [re2c](https://github.com/alemuller/tree-sitter-re2c) @@ -114,36 +133,46 @@ There are currently bindings that allow Tree-sitter to be used from the followin * [Rego](https://github.com/FallenAngel97/tree-sitter-rego) * [reStructuredText](https://github.com/stsewd/tree-sitter-rst) * [R](https://github.com/r-lib/tree-sitter-r) +* [Robot](https://github.com/Hubro/tree-sitter-robot) * [Ruby](https://github.com/tree-sitter/tree-sitter-ruby) * [Rust](https://github.com/tree-sitter/tree-sitter-rust) * [Scala](https://github.com/tree-sitter/tree-sitter-scala) * [Scheme](https://github.com/6cdh/tree-sitter-scheme) -* [Scss](https://github.com/serenadeai/tree-sitter-scss) +* [SCSS](https://github.com/serenadeai/tree-sitter-scss) * [S-expressions](https://github.com/AbstractMachinesLab/tree-sitter-sexp) * [Smali](https://github.com/amaanq/tree-sitter-smali) * [Smali](https://git.sr.ht/~yotam/tree-sitter-smali) -* [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn) +* [SourcePawn](https://github.com/nilshelmig/tree-sitter-sourcepawn) * [SPARQL](https://github.com/BonaBeavis/tree-sitter-sparql) * [SQL - BigQuery](https://github.com/takegue/tree-sitter-sql-bigquery) +* [SQL - General](https://github.com/DerekStride/tree-sitter-sql) * [SQL - PostgreSQL](https://github.com/m-novikov/tree-sitter-sql) * [SQL - SQLite](https://github.com/dhcmrlchtdj/tree-sitter-sqlite) * [SSH](https://github.com/metio/tree-sitter-ssh-client-config) +* [Supercollider](https://github.com/madskjeldgaard/tree-sitter-supercollider) * [Svelte](https://github.com/Himujjal/tree-sitter-svelte) * [Swift](https://github.com/alex-pinkus/tree-sitter-swift) * [SystemRDL](https://github.com/SystemRDL/tree-sitter-systemrdl) +* [Tact](https://github.com/tact-lang/tree-sitter-tact) * [Thrift](https://github.com/duskmoon314/tree-sitter-thrift) +* ["TODO:" comments](https://github.com/stsewd/tree-sitter-comment) * [TOML](https://github.com/ikatyang/tree-sitter-toml) * [Tree-sitter Query](https://github.com/nvim-treesitter/tree-sitter-query) * [Turtle](https://github.com/BonaBeavis/tree-sitter-turtle) +* [Twig](https://github.com/kaermorchen/tree-sitter-twig) * [Twig](https://github.com/gbprod/tree-sitter-twig) * [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript) +* [Ungrammar](https://github.com/Philipp-M/tree-sitter-ungrammar) +* [USD](https://github.com/ColinKennedy/tree-sitter-usd) * [Verilog](https://github.com/tree-sitter/tree-sitter-verilog) * [VHDL](https://github.com/alemuller/tree-sitter-vhdl) * [Vue](https://github.com/ikatyang/tree-sitter-vue) -* [WASM](https://github.com/wasm-lsp/tree-sitter-wasm) -* [WGSL WebGPU Shading Language](https://github.com/mehmetoguzderin/tree-sitter-wgsl) +* [Wasm](https://github.com/wasm-lsp/tree-sitter-wasm) +* [WDL](https://github.com/jdidion/tree-sitter-wdl) +* [WGSL (WebGPU Shading Language)](https://github.com/mehmetoguzderin/tree-sitter-wgsl) * [YAML](https://github.com/ikatyang/tree-sitter-yaml) * [YANG](https://github.com/Hubro/tree-sitter-yang) +* [Yuck](https://github.com/Philipp-M/tree-sitter-yuck) * [Zig](https://github.com/maxxnino/tree-sitter-zig) ### Talks on Tree-sitter @@ -156,9 +185,9 @@ There are currently bindings that allow Tree-sitter to be used from the followin The design of Tree-sitter was greatly influenced by the following research papers: -- [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf) -- [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf) -- [Efficient and Flexible Incremental Parsing](http://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf) -- [Incremental Analysis of Real Programming Languages](http://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf) -- [Error Detection and Recovery in LR Parsers](http://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13) -- [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf) +* [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf) +* [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf) +* [Efficient and Flexible Incremental Parsing](https://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf) +* [Incremental Analysis of Real Programming Languages](https://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf) +* [Error Detection and Recovery in LR Parsers](https://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13) +* [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf) diff --git a/docs/section-2-using-parsers.md b/docs/section-2-using-parsers.md index ea32f4f..266ed2a 100644 --- a/docs/section-2-using-parsers.md +++ b/docs/section-2-using-parsers.md @@ -51,7 +51,7 @@ Here's an example of a simple C program that uses the Tree-sitter [JSON parser]( // Declare the `tree_sitter_json` function, which is // implemented by the `tree-sitter-json` library. -TSLanguage *tree_sitter_json(); +const TSLanguage *tree_sitter_json(void); int main() { // Create a parser. @@ -137,7 +137,7 @@ TSTree *ts_parser_parse( ); ``` -The `TSInput` structure lets you to provide your own function for reading a chunk of text at a given byte offset and row/column position. The function can return text encoded in either UTF8 or UTF16. This interface allows you to efficiently parse text that is stored in your own data structure. +The `TSInput` structure lets you provide your own function for reading a chunk of text at a given byte offset and row/column position. The function can return text encoded in either UTF8 or UTF16. This interface allows you to efficiently parse text that is stored in your own data structure. ```c typedef struct { @@ -290,7 +290,7 @@ This `ts_node_edit` function is _only_ needed in the case where you have retriev ### Multi-language Documents -Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](http://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby. +Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](https://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby. Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain _ranges_ of a file. @@ -326,13 +326,13 @@ Conceptually, it can be represented by three syntax trees with overlapping range #include // These functions are each implemented in their own repo. -const TSLanguage *tree_sitter_embedded_template(); -const TSLanguage *tree_sitter_html(); -const TSLanguage *tree_sitter_ruby(); +const TSLanguage *tree_sitter_embedded_template(void); +const TSLanguage *tree_sitter_html(void); +const TSLanguage *tree_sitter_ruby(void); int main(int argc, const char **argv) { const char *text = argv[1]; - unsigned len = strlen(src); + unsigned len = strlen(text); // Parse the entire text as ERB. TSParser *parser = ts_parser_new(); @@ -410,6 +410,12 @@ Internally, copying a syntax tree just entails incrementing an atomic reference You can access every node in a syntax tree using the `TSNode` APIs [described above](#retrieving-nodes), but if you need to access a large number of nodes, the fastest way to do so is with a _tree cursor_. A cursor is a stateful object that allows you to walk a syntax tree with maximum efficiency. +Note that the given input node is considered the root of the cursor, and the +cursor cannot walk outside this node, so going to the parent or any sibling +of the root node will return `false`. This has no unexpected effects if the given +input node is the actual `root` node of the tree, but is something to keep in mind +when using nodes that are not the `root` node. + You can initialize a cursor from any node: ```c @@ -442,13 +448,13 @@ Many code analysis tasks involve searching for patterns in syntax trees. Tree-si A _query_ consists of one or more _patterns_, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes: -``` scheme +```scheme (binary_expression (number_literal) (number_literal)) ``` Children can also be omitted. For example, this would match any `binary_expression` where at least _one_ of child is a `string_literal` node: -``` scheme +```scheme (binary_expression (string_literal)) ``` @@ -456,7 +462,7 @@ Children can also be omitted. For example, this would match any `binary_expressi In general, it's a good idea to make patterns more specific by specifying [field names](#node-field-names) associated with child nodes. You do this by prefixing a child pattern with a field name followed by a colon. For example, this pattern would match an `assignment_expression` node where the `left` child is a `member_expression` whose `object` is a `call_expression`. -``` scheme +```scheme (assignment_expression left: (member_expression object: (call_expression))) @@ -464,9 +470,9 @@ In general, it's a good idea to make patterns more specific by specifying [field #### Negated Fields -You can also constrain a pattern so that it only matches nodes that *lack* a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters: +You can also constrain a pattern so that it only matches nodes that _lack_ a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters: -``` scheme +```scheme (class_declaration name: (identifier) @class_name !type_parameters) @@ -476,7 +482,7 @@ You can also constrain a pattern so that it only matches nodes that *lack* a cer The parenthesized syntax for writing nodes only applies to [named nodes](#named-vs-anonymous-nodes). To match specific anonymous nodes, you write their name between double quotes. For example, this pattern would match any `binary_expression` where the operator is `!=` and the right side is `null`: -``` scheme +```scheme (binary_expression operator: "!=" right: (null)) @@ -488,7 +494,7 @@ When matching patterns, you may want to process specific nodes within the patter For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `the-function-name` with the identifier: -``` scheme +```scheme (assignment_expression left: (identifier) @the-function-name right: (function)) @@ -496,7 +502,7 @@ For example, this pattern would match any assignment of a `function` to an `iden And this pattern would match all method definitions, associating the name `the-method-name` with the method name, `the-class-name` with the containing class name: -``` scheme +```scheme (class_declaration name: (identifier) @the-class-name body: (class_body @@ -510,13 +516,13 @@ You can match a repeating sequence of sibling nodes using the postfix `+` and `* For example, this pattern would match a sequence of one or more comments: -``` scheme +```scheme (comment)+ ``` This pattern would match a class declaration, capturing all of the decorators if any were present: -``` scheme +```scheme (class_declaration (decorator)* @the-decorator name: (identifier) @the-name) @@ -524,7 +530,7 @@ This pattern would match a class declaration, capturing all of the decorators if You can also mark a node as optional using the `?` operator. For example, this pattern would match all function calls, capturing a string argument if one was present: -``` scheme +```scheme (call_expression function: (identifier) @the-function arguments: (arguments (string)? @the-string-arg)) @@ -534,7 +540,7 @@ You can also mark a node as optional using the `?` operator. For example, this p You can also use parentheses for grouping a sequence of _sibling_ nodes. For example, this pattern would match a comment followed by a function declaration: -``` scheme +```scheme ( (comment) (function_declaration) @@ -543,7 +549,7 @@ You can also use parentheses for grouping a sequence of _sibling_ nodes. For exa Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also be applied to groups. For example, this pattern would match a comma-separated series of numbers: -``` scheme +```scheme ( (number) ("," (number))* @@ -558,7 +564,7 @@ This is similar to _character classes_ from regular expressions (`[abc]` matches For example, this pattern would match a call to either a variable or an object property. In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`: -``` scheme +```scheme (call_expression function: [ (identifier) @function @@ -569,7 +575,7 @@ In the case of a variable, capture it as `@function`, and in the case of a prope This pattern would match a set of possible keyword tokens, capturing them as `@keyword`: -``` scheme +```scheme [ "break" "delete" @@ -592,7 +598,7 @@ and `_` will match any named or anonymous node. For example, this pattern would match any node inside a call: -``` scheme +```scheme (call (_) @call.inner) ``` @@ -602,7 +608,7 @@ The anchor operator, `.`, is used to constrain the ways in which child patterns When `.` is placed before the _first_ child within a parent pattern, the child will only match when it is the first named node in the parent. For example, the below pattern matches a given `array` node at most once, assigning the `@the-element` capture to the first `identifier` node in the parent `array`: -``` scheme +```scheme (array . (identifier) @the-element) ``` @@ -610,13 +616,13 @@ Without this anchor, the pattern would match once for every identifier in the ar Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`. -``` scheme +```scheme (block (_) @last-expression .) ``` Finally, an anchor _between_ two child patterns will cause the patterns to only match nodes that are immediate siblings. The pattern below, given a long dotted name like `a.b.c.d`, will only match pairs of consecutive identifiers: `a, b`, `b, c`, and `c, d`. -``` scheme +```scheme (dotted_name (identifier) @prev-id . @@ -629,20 +635,38 @@ The restrictions placed on a pattern by an anchor operator ignore anonymous node #### Predicates -You can also specify arbitrary metadata and conditions associated with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings. +You can also specify arbitrary metadata and conditions associated with a pattern +by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions +start with a _predicate name_ beginning with a `#` character. After that, they can +contain an arbitrary number of `@`-prefixed capture names or strings. -For example, this pattern would match identifier whose names is written in `SCREAMING_SNAKE_CASE`: +Tree-Sitter's CLI supports the following predicates by default: -``` scheme -( - (identifier) @constant - (#match? @constant "^[A-Z][A-Z_]+") -) +##### eq?, not-eq?, any-eq?, any-not-eq? + +This family of predicates allows you to match against a single capture or string +value. + +The first argument must be a capture, but the second can be either a capture to +compare the two captures' text, or a string to compare first capture's text +against. + +The base predicate is "#eq?", but its complement "#not-eq?" can be used to _not_ +match a value. + +Consider the following example targeting C: + +```scheme +((identifier) @variable.builtin + (#eq? @variable.builtin "self")) ``` -And this pattern would match key-value pairs where the `value` is an identifier with the same name as the key: +This pattern would match any identifier that is `self`. + +And this pattern would match key-value pairs where the `value` is an identifier +with the same name as the key: -``` scheme +```scheme ( (pair key: (property_identifier) @key-name @@ -651,7 +675,87 @@ And this pattern would match key-value pairs where the `value` is an identifier ) ``` -_Note_ - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `#eq?` and `#match?`. +The prefix "any-" is meant for use with quantified captures. Here's +an example finding a segment of empty comments + +```scheme +((comment)+ @comment.empty + (#any-eq? @comment.empty "//")) +``` + +Note that "#any-eq?" will match a quantified capture if +_any_ of the nodes match the predicate, while by default a quantified capture +will only match if _all_ the nodes match the predicate. + +##### match?, not-match?, any-match?, any-not-match? + +These predicates are similar to the eq? predicates, but they use regular expressions +to match against the capture's text. + +The first argument must be a capture, and the second must be a string containing +a regular expression. + +For example, this pattern would match identifier whose name is written in `SCREAMING_SNAKE_CASE`: + +```scheme +((identifier) @constant + (#match? @constant "^[A-Z][A-Z_]+")) +``` + +Here's an example finding potential documentation comments in C + +```scheme +((comment)+ @comment.documentation + (#match? @comment.documentation "^///\\s+.*")) +``` + +Here's another example finding Cgo comments to potentially inject with C + +```scheme +((comment)+ @injection.content + . + (import_declaration + (import_spec path: (interpreted_string_literal) @_import_c)) + (#eq? @_import_c "\"C\"") + (#match? @injection.content "^//")) +``` + +##### any-of?, not-any-of? + +The "any-of?" predicate allows you to match a capture against multiple strings, +and will match if the capture's text is equal to any of the strings. + +Consider this example that targets JavaScript: + +```scheme +((identifier) @variable.builtin + (#any-of? @variable.builtin + "arguments" + "module" + "console" + "window" + "document")) +``` + +This will match any of the builtin variables in JavaScript. + +_Note_ — Predicates are not handled directly by the Tree-sitter C library. +They are just exposed in a structured form so that higher-level code can perform +the filtering. However, higher-level bindings to Tree-sitter like +[the Rust Crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) +or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) +do implement a few common predicates like the `#eq?`, `#match?`, and `#any-of?` +predicates explained above. + +To recap about the predicates Tree-Sitter's bindings support: + +- `#eq?` checks for a direct match against a capture or string +- `#match?` checks for a match against a regular expression +- `#any-of?` checks for a match against a list of strings +- Adding `not-` to the beginning of any of these predicates will negate the match +- By default, a quantified capture will only match if _all_ of the nodes match the predicate +- Adding `any-` before the `eq` or `match` predicates will instead match if any of the nodes match the predicate + ### The Query API diff --git a/docs/section-3-creating-parsers.md b/docs/section-3-creating-parsers.md index 5677292..6439abc 100644 --- a/docs/section-3-creating-parsers.md +++ b/docs/section-3-creating-parsers.md @@ -5,7 +5,7 @@ permalink: creating-parsers # Creating parsers -Developing Tree-sitter grammars can have a difficult learning curve, but once you get the hang of it, it can be fun and even zen-like. This document will help get you to get started and to develop a useful mental model. +Developing Tree-sitter grammars can have a difficult learning curve, but once you get the hang of it, it can be fun and even zen-like. This document will help you to get started and to develop a useful mental model. ## Getting Started @@ -20,7 +20,7 @@ In order to develop a Tree-sitter parser, there are two dependencies that you ne To create a Tree-sitter parser, you need to use [the `tree-sitter` CLI][tree-sitter-cli]. You can install the CLI in a few different ways: -* Build the `tree-sitter-cli` [Rust crate][crate] from source using [`cargo`][cargo], the Rust package manager. This works on any platform. See [the contributing docs](/docs/section-5-contributing.md#developing-tree-sitter) for more information. +* Build the `tree-sitter-cli` [Rust crate][crate] from source using [`cargo`][cargo], the Rust package manager. This works on any platform. See [the contributing docs](./contributing#developing-tree-sitter) for more information. * Install the `tree-sitter-cli` [Node.js module][node-module] using [`npm`][npm], the Node package manager. This approach is fast, but is only works on certain platforms, because it relies on pre-built binaries. * Download a binary for your platform from [the latest GitHub release][releases], and put it into a directory on your `PATH`. @@ -46,7 +46,7 @@ npm install --save nan npm install --save-dev tree-sitter-cli ``` -The last command will install the CLI into the `node_modules` folder in your working directory. An executable program called `tree-sitter` will be created inside of `node_modules/.bin/`. You may want to follow the Node.js convention of adding that folder to your your `PATH` so that you can easily run this program when working in this directory. +The last command will install the CLI into the `node_modules` folder in your working directory. An executable program called `tree-sitter` will be created inside of `node_modules/.bin/`. You may want to follow the Node.js convention of adding that folder to your `PATH` so that you can easily run this program when working in this directory. ```sh # In your shell profile script @@ -80,7 +80,9 @@ You can test this parser by creating a source file with the contents "hello" and echo 'hello' > example-file tree-sitter parse example-file ``` + Alternatively, in Windows PowerShell: + ```pwsh "hello" | Out-File example-file -Encoding utf8 tree-sitter parse example-file @@ -88,7 +90,7 @@ tree-sitter parse example-file This should print the following: -``` +```text (source_file [0, 0] - [1, 0]) ``` @@ -102,26 +104,79 @@ Let's go over all of the functionality of the `tree-sitter` command line tool. The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar, just run `tree-sitter generate` again. -The first time you run `tree-sitter generate`, it will also generate a few other files: +The first time you run `tree-sitter generate`, it will also generate a few other files for bindings for the following languages: + +#### C/C++ + +* `Makefile` - This file tells `make` how to compile your language. +* `bindings/c/tree-sitter-language.h` - This file provides the C interface of your language. +* `bindings/c/tree-sitter-language.pc` - This file provides pkg-config metadata about your language's C library. +* `src/tree_sitter/parser.h` - This file provides some basic C definitions that are used in your generated `parser.c` file. +* `src/tree_sitter/alloc.h` - This file provides some memory allocation macros that are to be used in your external scanner, if you have one. +* `src/tree_sitter/array.h` - This file provides some array macros that are to be used in your external scanner, if you have one. + +#### Go + +* `bindings/go/binding.go` - This file wraps your language in a Go module. +* `bindings/go/binding_test.go` - This file contains a test for the Go package. + +#### Node * `binding.gyp` - This file tells Node.js how to compile your language. * `bindings/node/index.js` - This is the file that Node.js initially loads when using your language. -* `bindings/node/binding.cc` - This file wraps your language in a JavaScript object when used in Node.js. +* `bindings/node/binding.cc` - This file wraps your language in a JavaScript module for Node.js. + +#### Python + +* `pyproject.toml` - This file is the manifest of the Python package. +* `setup.py` - This file tells Python how to compile your language. +* `bindings/python/binding.c` - This file wraps your language in a Python module. +* `bindings/python/tree_sitter_language/__init__.py` - This file tells Python how to load your language. +* `bindings/python/tree_sitter_language/__init__.pyi` - This file provides type hints for your parser when used in Python. +* `bindings/python/tree_sitter_language/py.typed` - This file provides type hints for your parser when used in Python. + +#### Rust + +* `Cargo.toml` - This file is the manifest of the Rust package. * `bindings/rust/lib.rs` - This file wraps your language in a Rust crate when used in Rust. * `bindings/rust/build.rs` - This file wraps the building process for the Rust crate. -* `src/tree_sitter/parser.h` - This file provides some basic C definitions that are used in your generated `parser.c` file. + +#### Swift + +* `Package.swift` - This file tells Swift how to compile your language. +* `bindings/swift/TreeSitterLanguage/language.h` - This file wraps your language in a Swift module when used in Swift. If there is an ambiguity or *local ambiguity* in your grammar, Tree-sitter will detect it during parser generation, and it will exit with a `Unresolved conflict` error message. See below for more information on these errors. +### Command: `build` + +The `build` command compiles your parser into a dynamically-loadable library, either as a shared object (`.so`, `.dylib`, or `.dll`) or as a WASM module. + +You can change the compiler executable via the `CC` environment variable and add extra flags via `CFLAGS`. For macOS or iOS, you can set `MACOSX_DEPLOYMENT_TARGET` or `IPHONEOS_DEPLOYMENT_TARGET` respectively to define the minimum supported version. + +You can specify whether to compile it as a wasm module with the `--wasm`/`-w` flag, and you can opt to use docker or podman to supply emscripten with the `--docker`/`-d` flag. This removes the need to install emscripten on your machine locally. + +You can specify where to output the shared object file (native or WASM) with the `--output`/`-o` flag, which accepts either an absolute path or relative path. Note that if you don't supply this flag, the CLI will attempt to figure out what the language name is based on the parent directory (so building in `tree-sitter-javascript` will resolve to `javascript`) to use for the output file. If it can't figure it out, it will default to `parser`, thus generating `parser.so` or `parser.wasm` in the current working directory. + +Lastly, you can also specify a path to the actual grammar directory, in case you are not currently in one. This is done by providing a path as the first *positional* argument. + +Example: + +```sh +tree-sitter build --wasm --output ./build/parser.wasm tree-sitter-javascript +``` + +Notice how the `tree-sitter-javascript` argument is the first positional argument. + ### Command: `test` The `tree-sitter test` command allows you to easily test that your parser is working correctly. -For each rule that you add to the grammar, you should first create a *test* that describes how the syntax trees should look when parsing that rule. These tests are written using specially-formatted text files in the `corpus/` or `test/corpus/` directories within your parser's root folder. +For each rule that you add to the grammar, you should first create a *test* that describes how the syntax trees should look when parsing that rule. These tests are written using specially-formatted text files in the `test/corpus/` directory within your parser's root folder. For example, you might have a file called `test/corpus/statements.txt` that contains a series of entries like this: -``` +```text ================== Return statements ================== @@ -147,7 +202,7 @@ func x() int { The expected output section can also *optionally* show the [*field names*][field-names-section] associated with each child node. To include field names in your tests, you write a node's field name followed by a colon, before the node itself in the S-expression: -``` +```text (source_file (function_definition name: (identifier) @@ -159,7 +214,7 @@ func x() int { * If your language's syntax conflicts with the `===` and `---` test separators, you can optionally add an arbitrary identical suffix (in the below example, `|||`) to disambiguate them: -``` +```text ==================||| Basic module ==================||| @@ -179,13 +234,73 @@ increment(n) == n + 1 These tests are important. They serve as the parser's API documentation, and they can be run every time you change the grammar to verify that everything still parses correctly. -By default, the `tree-sitter test` command runs all of the tests in your `corpus` or `test/corpus/` folder. To run a particular test, you can use the `-f` flag: +By default, the `tree-sitter test` command runs all of the tests in your `test/corpus/` folder. To run a particular test, you can use the `-f` flag: ```sh tree-sitter test -f 'Return statements' ``` -The recommendation is to be comprehensive in adding tests. If it's a visible node, add it to a test file in your `corpus` directory. It's typically a good idea to test all of the permutations of each language construct. This increases test coverage, but doubly acquaints readers with a way to examine expected outputs and understand the "edges" of a language. +The recommendation is to be comprehensive in adding tests. If it's a visible node, add it to a test file in your `test/corpus` directory. It's typically a good idea to test all of the permutations of each language construct. This increases test coverage, but doubly acquaints readers with a way to examine expected outputs and understand the "edges" of a language. + +#### Attributes + +Tests can be annotated with a few `attributes`. Attributes must be put in the header, below the test name, and start with a `:`. +A couple of attributes also take in a parameter, which require the use of parenthesis. + +**Note**: If you'd like to supply in multiple parameters, e.g. to run tests on multiple platforms or to test multiple languages, you can repeat the attribute on a new line. + +The following attributes are available: + +- `:skip` — This attribute will skip the test when running `tree-sitter test`. + This is useful when you want to temporarily disable running a test without deleting it. +- `:error` — This attribute will assert that the parse tree contains an error. It's useful to just validate that a certain input is invalid without displaying the whole parse tree, as such you should omit the parse tree below the `---` line. +- `:fail-fast` — This attribute will stop the testing additional tests if the test marked with this attribute fails. +- `:language(LANG)` — This attribute will run the tests using the parser for the specified language. This is useful for multi-parser repos, such as XML and DTD, or Typescript and TSX. The default parser will be the first entry in the `tree-sitter` field in the root `package.json`, so having a way to pick a second or even third parser is useful. +- `:platform(PLATFORM)` — This attribute specifies the platform on which the test should run. It is useful to test platform-specific behavior (e.g. Windows newlines are different from Unix). This attribute must match up with Rust's [`std::env::consts::OS`](https://doc.rust-lang.org/std/env/consts/constant.OS.html). + +Examples using attributes: + +```text +========================= +Test that will be skipped +:skip +========================= + +int main() {} + +------------------------- + +==================================== +Test that will run on Linux or macOS + +:platform(linux) +:platform(macos) +==================================== + +int main() {} + +------------------------------------ + +======================================================================== +Test that expects an error, and will fail fast if there's no parse error +:fail-fast +:error +======================================================================== + +int main ( {} + +------------------------------------------------------------------------ + +================================================= +Test that will parse with both Typescript and TSX +:language(typescript) +:language(tsx) +================================================= + +console.log('Hello, world!'); + +------------------------------------------------- +``` #### Automatic Compilation @@ -199,7 +314,7 @@ The `tree-sitter test` command will *also* run any syntax highlighting tests in You can run your parser on an arbitrary file using `tree-sitter parse`. This will print the resulting the syntax tree, including nodes' ranges and field names, like this: -``` +```text (source_file [0, 0] - [3, 0] (function_declaration [0, 0] - [2, 1] name: (identifier [0, 5] - [0, 9]) @@ -227,6 +342,20 @@ The following is a complete list of built-in functions you can use in your `gram * **Symbols (the `$` object)** - Every grammar rule is written as a JavaScript function that takes a parameter conventionally called `$`. The syntax `$.identifier` is how you refer to another grammar symbol within a rule. Names starting with `$.MISSING` or `$.UNEXPECTED` should be avoided as they have special meaning for the `tree-sitter test` command. * **String and Regex literals** - The terminal symbols in a grammar are described using JavaScript strings and regular expressions. Of course during parsing, Tree-sitter does not actually use JavaScript's regex engine to evaluate these regexes; it generates its own regex-matching logic as part of each parser. Regex literals are just used as a convenient way of writing regular expressions in your grammar. +* **Regex Limitations** - Currently, only a subset of the Regex engine is actually +supported. This is due to certain features like lookahead and lookaround assertions +not feasible to use in an LR(1) grammar, as well as certain flags being unnecessary +for tree-sitter. However, plenty of features are supported by default: + + * Character classes + * Character ranges + * Character sets + * Quantifiers + * Alternation + * Grouping + * Unicode character escapes + * Unicode property escapes + * **Sequences : `seq(rule1, rule2, ...)`** - This function creates a rule that matches any number of other rules, one after another. It is analogous to simply writing multiple symbols next to each other in [EBNF notation][ebnf]. * **Alternatives : `choice(rule1, rule2, ...)`** - This function creates a rule that matches *one* of a set of possible rules. The order of the arguments does not matter. This is analogous to the `|` (pipe) operator in EBNF notation. * **Repetitions : `repeat(rule)`** - This function creates a rule that matches *zero-or-more* occurrences of a given rule. It is analogous to the `{x}` (curly brace) syntax in EBNF notation. @@ -236,7 +365,15 @@ The following is a complete list of built-in functions you can use in your `gram * **Left Associativity : `prec.left([number], rule)`** - This function marks the given rule as left-associative (and optionally applies a numerical precedence). When an LR(1) conflict arises in which all of the rules have the same numerical precedence, Tree-sitter will consult the rules' associativity. If there is a left-associative rule, Tree-sitter will prefer matching a rule that ends *earlier*. This works similarly to [associativity directives][yacc-prec] in Yacc grammars. * **Right Associativity : `prec.right([number], rule)`** - This function is like `prec.left`, but it instructs Tree-sitter to prefer matching a rule that ends *later*. * **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars. -* **Tokens : `token(rule)`** - This function marks the given rule as producing only a single token. Tree-sitter's default is to treat each String or RegExp literal in the grammar as a separate token. Each token is matched separately by the lexer and returned as its own leaf node in the tree. The `token` function allows you to express a complex rule using the functions described above (rather than as a single regular expression) but still have Tree-sitter treat it as a single token. +* **Tokens : `token(rule)`** - This function marks the given rule as producing only +a single token. Tree-sitter's default is to treat each String or RegExp literal +in the grammar as a separate token. Each token is matched separately by the lexer +and returned as its own leaf node in the tree. The `token` function allows you to +express a complex rule using the functions described above (rather than as a single +regular expression) but still have Tree-sitter treat it as a single token. +The token function will only accept terminal rules, so `token($.foo)` will not work. +You can think of it as a shortcut for squashing complex rules of strings or regexes +down to a single token. * **Immediate Tokens : `token.immediate(rule)`** - Usually, whitespace (and any other extras, such as comments) is optional before each token. This function means that the token will only match if there is no whitespace. * **Aliases : `alias(rule, name)`** - This function causes the given rule to *appear* with an alternative name in the syntax tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes-section] called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an [anonymous node][named-vs-anonymous-nodes-section], as if the rule had been written as the simple string. * **Field Names : `field(name, rule)`** - This function assigns a *field name* to the child node(s) matched by the given rule. In the resulting syntax tree, you can then use that field name to access specific children. @@ -251,7 +388,6 @@ In addition to the `name` and `rules` fields, grammars have a few other optional * **`word`** - the name of a token that will match keywords for the purpose of the [keyword extraction](#keyword-extraction) optimization. * **`supertypes`** an array of hidden rule names which should be considered to be 'supertypes' in the generated [*node types* file][static-node-types]. - ## Writing the Grammar Writing a grammar requires creativity. There are an infinite number of CFGs (context-free grammars) that can be used to describe any given language. In order to produce a good Tree-sitter parser, you need to create a grammar with two important properties: @@ -363,7 +499,7 @@ With this structure in place, you can now freely decide what part of the grammar After developing the *type* sublanguage a bit further, you might decide to switch to working on *statements* or *expressions* instead. It's often useful to check your progress by trying to parse some real code using `tree-sitter parse`. -**And remember to add tests for each rule in your `corpus` folder!** +**And remember to add tests for each rule in your `test/corpus` folder!** ### Structuring Rules Well @@ -375,7 +511,7 @@ return x + y; According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series of production rules: -``` +```text ReturnStatement -> 'return' Expression Expression -> AssignmentExpression AssignmentExpression -> ConditionalExpression @@ -432,7 +568,7 @@ To produce a readable syntax tree, we'd like to model JavaScript expressions usi Of course, this flat structure is highly ambiguous. If we try to generate a parser, Tree-sitter gives us an error message: -``` +```text Error: Unresolved conflict for symbol sequence: '-' _expression • '*' … @@ -468,7 +604,7 @@ For an expression like `-a * b`, it's not clear whether the `-` operator applies Applying a higher precedence in `unary_expression` fixes that conflict, but there is still another conflict: -``` +```text Error: Unresolved conflict for symbol sequence: _expression '*' _expression • '*' … @@ -504,7 +640,7 @@ You may have noticed in the above examples that some of the grammar rule name li ### Using Fields -Often, it's easier to analyze a syntax nodes if you can refer to its children by *name* instead of by their position in an ordered list. Tree-sitter grammars support this using the `field` function. This function allows you to assign unique names to some or all of a node's children: +Often, it's easier to analyze a syntax node if you can refer to its children by *name* instead of by their position in an ordered list. Tree-sitter grammars support this using the `field` function. This function allows you to assign unique names to some or all of a node's children: ```js function_definition: $ => seq( @@ -526,29 +662,21 @@ Tree-sitter's parsing process is divided into two phases: parsing (which is desc Grammars often contain multiple tokens that can match the same characters. For example, a grammar might contain the tokens (`"if"` and `/[a-z]+/`). Tree-sitter differentiates between these conflicting tokens in a few ways. -1. **External Scanning** - If your grammar has an external scanner and one or more tokens in your `externals` array are valid at the current location, your external scanner will always be called first to determine whether those tokens are present. +1. **Context-aware Lexing** - Tree-sitter performs lexing on-demand, during the parsing process. At any given position in a source document, the lexer only tries to recognize tokens that are *valid* at that position in the document. -1. **Context-Aware Lexing** - Tree-sitter performs lexing on-demand, during the parsing process. At any given position in a source document, the lexer only tries to recognize tokens that are *valid* at that position in the document. +2. **Lexical Precedence** - When the precedence functions described [above](#the-grammar-dsl) are used *within* the `token` function, the given explicit precedence values serve as instructions to the lexer. If there are two valid tokens that match the characters at a given position in the document, Tree-sitter will select the one with the higher precedence. -1. **Earliest Starting Position** - Tree-sitter will prefer tokens with an earlier starting position. This is most often seen with very permissive regular expressions similar to `/.*/`, which are greedy and will consume as much text as possible. In this example the regex would consume all text until hitting a newline - even if text on that line could be interpreted as a different token. +3. **Match Length** - If multiple valid tokens with the same precedence match the characters at a given position in a document, Tree-sitter will select the token that matches the [longest sequence of characters][longest-match]. -1. **Explicit Lexical Precedence** - When the precedence functions described [above](#the-grammar-dsl) are used within the `token` function, the given precedence values serve as instructions to the lexer. If there are two valid tokens that match the characters at a given position in the document, Tree-sitter will select the one with the higher precedence. +4. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`. -1. **Match Length** - If multiple valid tokens with the same precedence match the characters at a given position in a document, Tree-sitter will select the token that matches the [longest sequence of characters][longest-match]. +5. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar. -1. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`. - -1. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar. +If there is an external scanner it may have [an additional impact](#other-external-scanner-details) over regular tokens defined in the grammar. ### Lexical Precedence vs. Parse Precedence -One common mistake involves not distinguishing lexical precedence from parse precedence. -Parse precedence determines which rule is chosen to interpret a given sequence of tokens. -Lexical precedence determines which token is chosen to interpret a given section of text. -It is a lower-level operation that is done first. -The above list fully capture tree-sitter's lexical precedence rules, and you will probably refer back to this section of the documentation more often than any other. -Most of the time when you really get stuck, you're dealing with a lexical precedence problem. -Pay particular attention to the difference in meaning between using `prec` inside the `token` function versus outside of it. +One common mistake involves not distinguishing *lexical precedence* from *parse precedence*. Parse precedence determines which rule is chosen to interpret a given sequence of tokens. *Lexical precedence* determines which token is chosen to interpret at a given position of text and it is a lower-level operation that is done first. The above list fully captures Tree-sitter's lexical precedence rules, and you will probably refer back to this section of the documentation more often than any other. Most of the time when you really get stuck, you're dealing with a lexical precedence problem. Pay particular attention to the difference in meaning between using `prec` inside of the `token` function versus outside of it. The *lexical precedence* syntax is `token(prec(N, ...))`. ### Keywords @@ -587,7 +715,7 @@ grammar({ ), binary_expression: $ => choice( - prec.left(1, seq($._expression, 'instanceof', $._expression) + prec.left(1, seq($._expression, 'instanceof', $._expression)) // ... ), @@ -608,6 +736,7 @@ Aside from improving error detection, keyword extraction also has performance be ### External Scanners Many languages have some tokens whose structure is impossible or inconvenient to describe with a regular expression. Some examples: + * [Indent and dedent][indent-tokens] tokens in Python * [Heredocs][heredoc] in Bash and Ruby * [Percent strings][percent-string] in Ruby @@ -632,10 +761,19 @@ grammar({ Then, add another C or C++ source file to your project. Currently, its path must be `src/scanner.c` or `src/scanner.cc` for the CLI to recognize it. Be sure to add this file to the `sources` section of your `binding.gyp` file so that it will be included when your project is compiled by Node.js and uncomment the appropriate block in your `bindings/rust/build.rs` file so that it will be included in your Rust crate. +> **Note** +> +> C++ scanners are now deprecated and will be removed in the near future. +> While it is currently possible to write an external scanner in C++, it can be difficult +> to get working cross-platform and introduces extra requirements; therefore it +> is *greatly* preferred to use C. + In this new source file, define an [`enum`][enum] type containing the names of all of your external tokens. The ordering of this enum must match the order in your grammar's `externals` array; the actual names do not matter. ```c -#include +#include "tree_sitter/parser.h" +#include "tree_sitter/alloc.h" +#include "tree_sitter/array.h" enum TokenType { INDENT, @@ -649,14 +787,13 @@ Finally, you must define five functions with specific names, based on your langu #### Create ```c -void * tree_sitter_my_language_external_scanner_create() { +void *tree_sitter_my_language_external_scanner_create(void) { // ... } ``` This function should create your scanner object. It will only be called once anytime your language is set on a parser. Often, you will want to allocate memory on the heap and return a pointer to it. If your external scanner doesn't need to maintain any state, it's ok to return `NULL`. - #### Destroy ```c @@ -716,13 +853,13 @@ This function is responsible for recognizing external tokens. It should return ` * **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace; whitespace won't be included in the text range associated with tokens emitted by the external scanner. * **`void (*mark_end)(TSLexer *)`** - A function for marking the end of the recognized token. This allows matching tokens that require multiple characters of lookahead. By default (if you don't call `mark_end`), any character that you moved past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls to `advance` will *not* increase the size of the returned token. You can call `mark_end` multiple times to increase the size of the token. * **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this function by reading from the start of the line. -* **`bool (*is_at_included_range_start)(const TSLexer *)`** - A function for checking whether the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), your scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`. +* **`bool (*is_at_included_range_start)(const TSLexer *)`** - A function for checking whether the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), the scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`. * **`bool (*eof)(const TSLexer *)`** - A function for determining whether the lexer is at the end of the file. The value of `lookahead` will be `0` at the end of a file, but this function should be used instead of checking for that value because the `0` or "NUL" value is also a valid character that could be present in the file being parsed. -The third argument to the `scan` function is an array of booleans that indicates which of your external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic. +The third argument to the `scan` function is an array of booleans that indicates which of external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic. ```c -if (valid_symbols[INDENT] || valid_symbol[DEDENT]) { +if (valid_symbols[INDENT] || valid_symbols[DEDENT]) { // ... logic that is common to both `INDENT` and `DEDENT` @@ -736,23 +873,124 @@ if (valid_symbols[INDENT] || valid_symbol[DEDENT]) { } ``` +#### External Scanner Helpers + +##### Allocator + +Instead of using libc's `malloc`, `calloc`, `realloc`, and `free`, you should use the versions prefixed with `ts_` from `tree_sitter/alloc.h`. +These macros can allow a potential consumer to override the default allocator with their own implementation, but by default will use the libc functions. + +As a consumer of the tree-sitter core library as well as any parser libraries that might use allocations, you can enable overriding the default allocator and have it use the same one as the library allocator, of which you can set with `ts_set_allocator`. +To enable this overriding in scanners, you must compile them with the `TREE_SITTER_REUSE_ALLOCATOR` macro defined, and tree-sitter the library must be linked into your final app dynamically, since it needs to resolve the internal functions at runtime. If you are compiling +an executable binary that uses the core library, but want to load parsers dynamically at runtime, then you will have to use a special linker flag on Unix. For non-Darwin systems, that would be `--dynamic-list` and for Darwin systems, that would be `-exported_symbols_list`. +The CLI does exactly this, so you can use it as a reference (check out `cli/build.rs`). + +For example, assuming you wanted to allocate 100 bytes for your scanner, you'd do so like the following example: + +```c +#include "tree_sitter/parser.h" +#include "tree_sitter/alloc.h" + +// ... + +void *tree_sitter_my_language_external_scanner_create(void) { + return ts_calloc(100, 1); // or ts_malloc(100) +} + +// ... + +``` + +##### Arrays + +If you need to use array-like types in your scanner, such as tracking a stack of indentations or tags, you should use the array macros from `tree_sitter/array.h`. + +There are quite a few of them provided for you, but here's how you could get started tracking some . Check out the header itself for more detailed documentation. + +**NOTE**: Do not use any of the array functions or macros that are prefixed with an underscore and have comments saying that it is not what you are looking for. +These are internal functions used as helpers by other macros that are public. They are not meant to be used directly, nor are they what you want. + +```c +#include "tree_sitter/parser.h" +#include "tree_sitter/array.h" + +enum TokenType { + INDENT, + DEDENT, + NEWLINE, + STRING, +} + +// Create the array in your create function + +void *tree_sitter_my_language_external_scanner_create(void) { + return ts_calloc(1, sizeof(Array(int))); + + // or if you want to zero out the memory yourself + + Array(int) *stack = ts_malloc(sizeof(Array(int))); + array_init(&stack); + return stack; +} + +bool tree_sitter_my_language_external_scanner_scan( + void *payload, + TSLexer *lexer, + const bool *valid_symbols +) { + Array(int) *stack = payload; + if (valid_symbols[INDENT]) { + array_push(stack, lexer->get_column(lexer)); + lexer->result_symbol = INDENT; + return true; + } + if (valid_symbols[DEDENT]) { + array_pop(stack); // this returns the popped element by value, but we don't need it + lexer->result_symbol = DEDENT; + return true; + } + + // we can also use an array on the stack to keep track of a string + + Array(char) next_string = array_new(); + + if (valid_symbols[STRING] && lexer->lookahead == '"') { + lexer->advance(lexer, false); + while (lexer->lookahead != '"' && lexer->lookahead != '\n' && !lexer->eof(lexer)) { + array_push(&next_string, lexer->lookahead); + lexer->advance(lexer, false); + } + + // assume we have some arbitrary constraint of not having more than 100 characters in a string + if (lexer->lookahead == '"' && next_string.size <= 100) { + lexer->advance(lexer, false); + lexer->result_symbol = STRING; + return true; + } + } + + return false; +} + +``` + #### Other External Scanner Details -If a token in your `externals` array is valid at the current position in the parse, your external scanner will be called first before anything else is done. -This means your external scanner functions as a powerful override of tree-sitter's lexing behavior, and can be used to solve problems that can't be cracked with ordinary lexical, parse, or dynamic precedence. +If a token in the `externals` array is valid at a given position in the parse, the external scanner will be called first before anything else is done. This means the external scanner functions as a powerful override of Tree-sitter's lexing behavior, and can be used to solve problems that can't be cracked with ordinary lexical, parse, or dynamic precedence. + +If a syntax error is encountered during regular parsing, Tree-sitter's first action during error recovery will be to call the external scanner's `scan` function with all tokens marked valid. The scanner should detect this case and handle it appropriately. One simple method of detection is to add an unused token to the end of the `externals` array, for example `externals: $ => [$.token1, $.token2, $.error_sentinel]`, then check whether that token is marked valid to determine whether Tree-sitter is in error correction mode. + +If you put terminal keywords in the `externals` array, for example `externals: $ => ['if', 'then', 'else']`, then any time those terminals are present in the grammar they will be tokenized by the external scanner. It is similar to writing `externals: [$.if_keyword, $.then_keyword, $.else_keyword]` then using `alias($.if_keyword, 'if')` in the grammar. -If a syntax error is encountered during regular parsing, tree-sitter's first action during error recovery will be to call your external scanner's `scan` function with all tokens marked valid. -Your scanner should detect this case and handle it appropriately. -One simple method of detection is to add an unused token to the end of your `externals` array, for example `externals: $ => [$.token1, $.token2, $.error_sentinel]`, then check whether that token is marked valid to determine whether tree-sitter is in error correction mode. +If in the `externals` array use literal keywords then lexing works in two steps, the external scanner will be called first and if it sets a resulting token and returns `true` then the token considered as recognized and Tree-sitter moves to a next token. But the external scanner may return `false` and in this case Tree-sitter fallbacks to the internal lexing mechanism. -If you put terminal keywords in your `externals` array, for example `externals: $ => ['if', 'then', 'else']`, then any time those terminals are present in your grammar they will be tokenized by your external scanner. -It is equivalent to writing `externals: [$.if_keyword, $.then_keyword, $.else_keyword]` then using `alias($.if_keyword, 'if')` in your grammar. +In case of some keywords defined in the `externals` array in a rule referencing form like `$.if_keyword` and there is no additional definition of that rule in the grammar rules, e.g., `if_keyword: $ => 'if'` then fallback to the internal lexer isn't possible because Tree-sitter doesn't know the actual keyword and it's fully the external scanner resposibilty to recognize such tokens. External scanners are a common cause of infinite loops. Be very careful when emitting zero-width tokens from your external scanner, and if you consume characters in a loop be sure use the `eof` function to check whether you are at the end of the file. [ambiguous-grammar]: https://en.wikipedia.org/wiki/Ambiguous_grammar -[antlr]: http://www.antlr.org/ +[antlr]: https://www.antlr.org [bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html [bison]: https://en.wikipedia.org/wiki/GNU_bison [c-linkage]: https://en.cppreference.com/w/cpp/language/language_linkage diff --git a/docs/section-4-syntax-highlighting.md b/docs/section-4-syntax-highlighting.md index a6e5d74..818172f 100644 --- a/docs/section-4-syntax-highlighting.md +++ b/docs/section-4-syntax-highlighting.md @@ -9,8 +9,6 @@ Syntax highlighting is a very common feature in applications that deal with code This document explains how the Tree-sitter syntax highlighting system works, using the command line interface. If you are using `tree-sitter-highlight` library (either from C or from Rust), all of these concepts are still applicable, but the configuration data is provided using in-memory objects, rather than files. -**Note - If you are working on syntax highlighting in the [Atom](https://atom.io/) text editor, you should consult [the grammar-creation page](https://flight-manual.atom.io/hacking-atom/sections/creating-a-grammar/) of the Atom Flight Manual, *not* this document. Atom currently uses a different syntax highlighting system that is also based on Tree-sitter, but is older than the one described here.** - ## Overview All of the files needed to highlight a given language are normally included in the same git repository as the Tree-sitter grammar for that language (for example, [`tree-sitter-javascript`](https://github.com/tree-sitter/tree-sitter-javascript), [`tree-sitter-ruby`](https://github.com/tree-sitter/tree-sitter-ruby)). In order to run syntax highlighting from the command-line, three types of files are needed: @@ -27,9 +25,9 @@ The Tree-sitter CLI automatically creates two directories in your home folder. These directories are created in the "normal" place for your platform: -- On Linux, `~/.config/tree-sitter` and `~/.cache/tree-sitter` -- On Mac, `~/Library/Application Support/tree-sitter` and `~/Library/Caches/tree-sitter` -- On Windows, `C:\Users\[username]\AppData\Roaming\tree-sitter` and `C:\Users\[username]\AppData\Local\tree-sitter` +* On Linux, `~/.config/tree-sitter` and `~/.cache/tree-sitter` +* On Mac, `~/Library/Application Support/tree-sitter` and `~/Library/Caches/tree-sitter` +* On Windows, `C:\Users\[username]\AppData\Roaming\tree-sitter` and `C:\Users\[username]\AppData\Local\tree-sitter` The CLI will work if there's no config file present, falling back on default values for each configuration option. To create a config file that you can edit, run this command: @@ -63,6 +61,7 @@ In your config file, the `"theme"` value is an object whose keys are dot-separat #### Highlight Names A theme can contain multiple keys that share a common subsequence. Examples: + * `variable` and `variable.parameter` * `function`, `function.builtin`, and `function.method` @@ -93,6 +92,11 @@ These keys specify basic information about the parser: * `path` (optional) - A relative path from the directory containing `package.json` to another directory containing the `src/` folder, which contains the actual generated parser. The default value is `"."` (so that `src/` is in the same folder as `package.json`), and this very rarely needs to be overridden. +* `external-files` (optional) - A list of relative paths from the root dir of a +parser to files that should be checked for modifications during recompilation. +This is useful during development to have changes to other files besides scanner.c +be picked up by the cli. + ### Language Detection These keys help to decide whether the language applies to a given file: @@ -160,7 +164,7 @@ func increment(a int) int { With this syntax tree: -``` +```scheme (source_file (function_declaration name: (identifier) @@ -180,6 +184,7 @@ With this syntax tree: #### Example Query Suppose we wanted to render this code with the following colors: + * keywords `func` and `return` in purple * function `increment` in blue * type `int` in green @@ -187,7 +192,7 @@ Suppose we wanted to render this code with the following colors: We can assign each of these categories a *highlight name* using a query like this: -``` +```scheme ; highlights.scm "func" @keyword @@ -254,7 +259,7 @@ list = [item] With this syntax tree: -``` +```scheme (program (method name: (identifier) @@ -297,7 +302,7 @@ There are several different types of names within this method: Let's write some queries that let us clearly distinguish between these types of names. First, set up the highlighting query, as described in the previous section. We'll assign distinct colors to method calls, method definitions, and formal parameters: -``` +```scheme ; highlights.scm (call method: (identifier) @function.method) @@ -314,7 +319,7 @@ Let's write some queries that let us clearly distinguish between these types of Then, we'll set up a local variable query to keep track of the variables and scopes. Here, we're indicating that methods and blocks create local *scopes*, parameters and assignments create *definitions*, and other identifiers should be considered *references*: -``` +```scheme ; locals.scm (method) @local.scope @@ -347,6 +352,7 @@ Running `tree-sitter highlight` on this ruby file would produce output like this ### Language Injection Some source files contain code written in multiple different languages. Examples include: + * HTML files, which can contain JavaScript inside of ` diff --git a/docs/section-8-code-navigation-systems.md b/docs/section-8-code-navigation-systems.md index a1b6a28..04346e4 100644 --- a/docs/section-8-code-navigation-systems.md +++ b/docs/section-8-code-navigation-systems.md @@ -9,7 +9,7 @@ Tree-sitter can be used in conjunction with its [tree query language](https://tr ## Tagging and captures -*Tagging* is the act of identifying the entities that can be named in a program. We use Tree-sitter queries to find those entities. Having found them, you use a syntax capture to label the entity and its name. +_Tagging_ is the act of identifying the entities that can be named in a program. We use Tree-sitter queries to find those entities. Having found them, you use a syntax capture to label the entity and its name. The essence of a given tag lies in two pieces of data: the _role_ of the entity that is matched (i.e. whether it is a definition or a reference) and the _kind_ of that entity, which describes how the entity is used (i.e. whether it's a class definition, function call, variable reference, and so on). Our convention is to use a syntax capture following the `@role.kind` capture name format, and another inner capture, always called `@name`, that pulls out the name of a given identifier. @@ -19,14 +19,14 @@ You may optionally include a capture named `@doc` to bind a docstring. For conve This [query](https://github.com/tree-sitter/tree-sitter-python/blob/78c4e9b6b2f08e1be23b541ffced47b15e2972ad/queries/tags.scm#L4-L5) recognizes Python function definitions and captures their declared name. The `function_definition` syntax node is defined in the [Python Tree-sitter grammar](https://github.com/tree-sitter/tree-sitter-python/blob/78c4e9b6b2f08e1be23b541ffced47b15e2972ad/grammar.js#L354). -``` scheme +```scheme (function_definition name: (identifier) @name) @definition.function ``` A more sophisticated query can be found in the [JavaScript Tree-sitter repository](https://github.com/tree-sitter/tree-sitter-javascript/blob/fdeb68ac8d2bd5a78b943528bb68ceda3aade2eb/queries/tags.scm#L63-L70): -``` scheme +```scheme (assignment_expression left: [ (identifier) @name @@ -39,7 +39,7 @@ A more sophisticated query can be found in the [JavaScript Tree-sitter repositor An even more sophisticated query is in the [Ruby Tree-sitter repository](https://github.com/tree-sitter/tree-sitter-ruby/blob/1ebfdb288842dae5a9233e2509a135949023dd82/queries/tags.scm#L24-L43), which uses built-in functions to strip the Ruby comment character (`#`) from the docstrings associated with a class or singleton-class declaration, then selects only the docstrings adjacent to the node matched as `@definition.class`. -``` scheme +```scheme ( (comment)* @doc . @@ -79,7 +79,7 @@ The below table describes a standard vocabulary for kinds and roles during the t You can use the `tree-sitter tags` command to test out a tags query file, passing as arguments one or more files to tag. We can run this tool from within the Tree-sitter Ruby repository, over code in a file called `test.rb`: -``` ruby +```ruby module Foo class Bar # won't be included @@ -93,7 +93,7 @@ end Invoking `tree-sitter tags test.rb` produces the following console output, representing matched entities' name, role, location, first line, and docstring: -``` +```text test.rb Foo | module def (0, 7) - (0, 10) `module Foo` Bar | class def (1, 8) - (1, 11) `class Bar` diff --git a/highlight/Cargo.toml b/highlight/Cargo.toml index e85ced8..694f506 100644 --- a/highlight/Cargo.toml +++ b/highlight/Cargo.toml @@ -1,26 +1,26 @@ [package] name = "tree-sitter-highlight" +version.workspace = true description = "Library for performing syntax highlighting with Tree-sitter" -version = "0.20.1" authors = [ "Max Brunsfeld ", "Tim Clem ", ] -license = "MIT" +edition.workspace = true +rust-version.workspace = true readme = "README.md" -edition = "2018" +homepage.workspace = true +repository.workspace = true +license.workspace = true keywords = ["incremental", "parsing", "syntax", "highlighting"] categories = ["parsing", "text-editors"] -repository = "https://github.com/tree-sitter/tree-sitter" -rust-version.workspace = true [lib] crate-type = ["lib", "staticlib"] [dependencies] -regex = "1" -thiserror = "1.0" +lazy_static.workspace = true +regex.workspace = true +thiserror.workspace = true -[dependencies.tree-sitter] -version = "0.20" -path = "../lib" +tree-sitter.workspace = true diff --git a/highlight/README.md b/highlight/README.md index e8a5d06..982e510 100644 --- a/highlight/README.md +++ b/highlight/README.md @@ -1,22 +1,25 @@ -# `tree-sitter-highlight` +# Tree-sitter Highlight -[![Crates.io](https://img.shields.io/crates/v/tree-sitter-highlight.svg)](https://crates.io/crates/tree-sitter-highlight) +[![crates.io badge]][crates.io] -### Usage +[crates.io]: https://crates.io/crates/tree-sitter-highlight +[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-highlight.svg?color=%23B48723 -Add this crate, and the language-specific crates for whichever languages you want to parse, to your `Cargo.toml`: +## Usage + +Add this crate, and the language-specific crates for whichever languages you want +to parse, to your `Cargo.toml`: ```toml [dependencies] -tree-sitter-highlight = "0.19" -tree-sitter-html = "0.19" -tree-sitter-javascript = "0.19" +tree-sitter-highlight = "^0.21.0" +tree-sitter-javascript = "0.20.3" ``` Define the list of highlight names that you will recognize: ```rust -let highlight_names = &[ +let highlight_names = [ "attribute", "constant", "function.builtin", @@ -38,34 +41,29 @@ let highlight_names = &[ ]; ``` -Create a highlighter. You need one of these for each thread that you're using for syntax highlighting: +Create a highlighter. You need one of these for each thread that you're using for +syntax highlighting: ```rust use tree_sitter_highlight::Highlighter; -let highlighter = Highlighter::new(); +let mut highlighter = Highlighter::new(); ``` -Load some highlighting queries from the `queries` directory of some language repositories: +Load some highlighting queries from the `queries` directory of the language repository: ```rust use tree_sitter_highlight::HighlightConfiguration; -let html_language = unsafe { tree_sitter_html() }; -let javascript_language = unsafe { tree_sitter_javascript() }; - -let html_config = HighlightConfiguration::new( - tree_sitter_html::language(), - tree_sitter_html::HIGHLIGHTS_QUERY, - tree_sitter_html::INJECTIONS_QUERY, - "", -).unwrap(); +let javascript_language = tree_sitter_javascript::language(); -let javascript_config = HighlightConfiguration::new( - tree_sitter_javascript::language(), - tree_sitter_javascript::HIGHLIGHTS_QUERY, - tree_sitter_javascript::INJECTIONS_QUERY, - tree_sitter_javascript::LCOALS_QUERY, +let mut javascript_config = HighlightConfiguration::new( + javascript_language, + "javascript", + tree_sitter_javascript::HIGHLIGHT_QUERY, + tree_sitter_javascript::INJECTION_QUERY, + tree_sitter_javascript::LOCALS_QUERY, + false, ).unwrap(); ``` @@ -102,4 +100,6 @@ for event in highlights { } ``` -The last parameter to `highlight` is a *language injection* callback. This allows other languages to be retrieved when Tree-sitter detects an embedded document (for example, a piece of JavaScript code inside of a `script` tag within HTML). +The last parameter to `highlight` is a _language injection_ callback. This allows +other languages to be retrieved when Tree-sitter detects an embedded document +(for example, a piece of JavaScript code inside a `script` tag within HTML). diff --git a/highlight/include/tree_sitter/highlight.h b/highlight/include/tree_sitter/highlight.h index 496faea..ecf45a7 100644 --- a/highlight/include/tree_sitter/highlight.h +++ b/highlight/include/tree_sitter/highlight.h @@ -40,6 +40,7 @@ void ts_highlighter_delete(TSHighlighter *); // written in a different language. TSHighlightError ts_highlighter_add_language( TSHighlighter *self, + const char *language_name, const char *scope_name, const char *injection_regex, const TSLanguage *language, diff --git a/highlight/src/c_lib.rs b/highlight/src/c_lib.rs index d48a180..bf291c9 100644 --- a/highlight/src/c_lib.rs +++ b/highlight/src/c_lib.rs @@ -1,13 +1,13 @@ -use super::{Error, Highlight, HighlightConfiguration, Highlighter, HtmlRenderer}; +use std::{ + collections::HashMap, ffi::CStr, fmt, os::raw::c_char, process::abort, slice, str, + sync::atomic::AtomicUsize, +}; + use regex::Regex; -use std::collections::HashMap; -use std::ffi::CStr; -use std::os::raw::c_char; -use std::process::abort; -use std::sync::atomic::AtomicUsize; -use std::{fmt, slice, str}; use tree_sitter::Language; +use super::{Error, Highlight, HighlightConfiguration, Highlighter, HtmlRenderer}; + pub struct TSHighlighter { languages: HashMap, HighlightConfiguration)>, attribute_strings: Vec<&'static [u8]>, @@ -29,25 +29,30 @@ pub enum ErrorCode { InvalidUtf8, InvalidRegex, InvalidQuery, + InvalidLanguageName, } +/// Create a new [`TSHighlighter`] instance. +/// +/// # Safety +/// +/// The caller must ensure that the `highlight_names` and `attribute_strings` arrays are valid for +/// the lifetime of the returned [`TSHighlighter`] instance, and are non-null. #[no_mangle] -pub extern "C" fn ts_highlighter_new( +pub unsafe extern "C" fn ts_highlighter_new( highlight_names: *const *const c_char, attribute_strings: *const *const c_char, highlight_count: u32, ) -> *mut TSHighlighter { - let highlight_names = - unsafe { slice::from_raw_parts(highlight_names, highlight_count as usize) }; - let attribute_strings = - unsafe { slice::from_raw_parts(attribute_strings, highlight_count as usize) }; + let highlight_names = slice::from_raw_parts(highlight_names, highlight_count as usize); + let attribute_strings = slice::from_raw_parts(attribute_strings, highlight_count as usize); let highlight_names = highlight_names - .into_iter() - .map(|s| unsafe { CStr::from_ptr(*s).to_string_lossy().to_string() }) + .iter() + .map(|s| CStr::from_ptr(*s).to_string_lossy().to_string()) .collect::>(); let attribute_strings = attribute_strings - .into_iter() - .map(|s| unsafe { CStr::from_ptr(*s).to_bytes() }) + .iter() + .map(|s| CStr::from_ptr(*s).to_bytes()) .collect(); let carriage_return_index = highlight_names.iter().position(|s| s == "carriage-return"); Box::into_raw(Box::new(TSHighlighter { @@ -58,9 +63,21 @@ pub extern "C" fn ts_highlighter_new( })) } +/// Add a language to a [`TSHighlighter`] instance. +/// +/// Returns an [`ErrorCode`] indicating whether the language was added successfully or not. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance +/// created by [`ts_highlighter_new`]. +/// +/// The caller must ensure that any `*const c_char` (C-style string) parameters are valid for the +/// lifetime of the [`TSHighlighter`] instance, and are non-null. #[no_mangle] -pub extern "C" fn ts_highlighter_add_language( +pub unsafe extern "C" fn ts_highlighter_add_language( this: *mut TSHighlighter, + language_name: *const c_char, scope_name: *const c_char, injection_regex: *const c_char, language: Language, @@ -73,7 +90,7 @@ pub extern "C" fn ts_highlighter_add_language( ) -> ErrorCode { let f = move || { let this = unwrap_mut_ptr(this); - let scope_name = unsafe { CStr::from_ptr(scope_name) }; + let scope_name = CStr::from_ptr(scope_name); let scope_name = scope_name .to_str() .or(Err(ErrorCode::InvalidUtf8))? @@ -81,38 +98,44 @@ pub extern "C" fn ts_highlighter_add_language( let injection_regex = if injection_regex.is_null() { None } else { - let pattern = unsafe { CStr::from_ptr(injection_regex) }; + let pattern = CStr::from_ptr(injection_regex); let pattern = pattern.to_str().or(Err(ErrorCode::InvalidUtf8))?; Some(Regex::new(pattern).or(Err(ErrorCode::InvalidRegex))?) }; - let highlight_query = unsafe { - slice::from_raw_parts(highlight_query as *const u8, highlight_query_len as usize) - }; + let highlight_query = + slice::from_raw_parts(highlight_query.cast::(), highlight_query_len as usize); + let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?; let injection_query = if injection_query_len > 0 { - let query = unsafe { - slice::from_raw_parts(injection_query as *const u8, injection_query_len as usize) - }; + let query = + slice::from_raw_parts(injection_query.cast::(), injection_query_len as usize); str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))? } else { "" }; let locals_query = if locals_query_len > 0 { - let query = unsafe { - slice::from_raw_parts(locals_query as *const u8, locals_query_len as usize) - }; + let query = slice::from_raw_parts(locals_query.cast::(), locals_query_len as usize); str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))? } else { "" }; - let mut config = - HighlightConfiguration::new(language, highlight_query, injection_query, locals_query) - .or(Err(ErrorCode::InvalidQuery))?; - config.configure(&this.highlight_names.as_slice()); + let lang = CStr::from_ptr(language_name) + .to_str() + .or(Err(ErrorCode::InvalidLanguageName))?; + + let mut config = HighlightConfiguration::new( + language, + lang, + highlight_query, + injection_query, + locals_query, + ) + .or(Err(ErrorCode::InvalidQuery))?; + config.configure(this.highlight_names.as_slice()); this.languages.insert(scope_name, (injection_regex, config)); Ok(()) @@ -132,42 +155,103 @@ pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer { })) } +/// Deletes a [`TSHighlighter`] instance. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance +/// created by [`ts_highlighter_new`]. +/// +/// It cannot be used after this function is called. #[no_mangle] -pub extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) { - drop(unsafe { Box::from_raw(this) }) +pub unsafe extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) { + drop(Box::from_raw(this)); } +/// Deletes a [`TSHighlightBuffer`] instance. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`] +/// +/// It cannot be used after this function is called. #[no_mangle] -pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { - drop(unsafe { Box::from_raw(this) }) +pub unsafe extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { + drop(Box::from_raw(this)); } +/// Get the HTML content of a [`TSHighlightBuffer`] instance as a raw pointer. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`]. +/// +/// The returned pointer, a C-style string, must not outlive the [`TSHighlightBuffer`] instance, +/// else the data will point to garbage. +/// +/// To get the length of the HTML content, use [`ts_highlight_buffer_len`]. #[no_mangle] -pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 { +pub unsafe extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 { let this = unwrap_ptr(this); this.renderer.html.as_slice().as_ptr() } +/// Get the line offsets of a [`TSHighlightBuffer`] instance as a C-style array. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`]. +/// +/// The returned pointer, a C-style array of [`u32`]s, must not outlive the [`TSHighlightBuffer`] +/// instance, else the data will point to garbage. +/// +/// To get the length of the array, use [`ts_highlight_buffer_line_count`]. #[no_mangle] -pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 { +pub unsafe extern "C" fn ts_highlight_buffer_line_offsets( + this: *const TSHighlightBuffer, +) -> *const u32 { let this = unwrap_ptr(this); this.renderer.line_offsets.as_slice().as_ptr() } +/// Get the length of the HTML content of a [`TSHighlightBuffer`] instance. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`]. #[no_mangle] -pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 { +pub unsafe extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); this.renderer.html.len() as u32 } +/// Get the number of lines in a [`TSHighlightBuffer`] instance. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`]. #[no_mangle] -pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 { +pub unsafe extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); this.renderer.line_offsets.len() as u32 } +/// Highlight a string of source code. +/// +/// # Safety +/// +/// The caller must ensure that `scope_name`, `source_code`, `output`, and `cancellation_flag` are +/// valid for the lifetime of the [`TSHighlighter`] instance, and are non-null. +/// +/// `this` must be a non-null pointer to a [`TSHighlighter`] instance created by +/// [`ts_highlighter_new`] #[no_mangle] -pub extern "C" fn ts_highlighter_highlight( +pub unsafe extern "C" fn ts_highlighter_highlight( this: *const TSHighlighter, scope_name: *const c_char, source_code: *const c_char, @@ -177,10 +261,9 @@ pub extern "C" fn ts_highlighter_highlight( ) -> ErrorCode { let this = unwrap_ptr(this); let output = unwrap_mut_ptr(output); - let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() }); - let source_code = - unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) }; - let cancellation_flag = unsafe { cancellation_flag.as_ref() }; + let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); + let source_code = slice::from_raw_parts(source_code.cast::(), source_code_len as usize); + let cancellation_flag = cancellation_flag.as_ref(); this.highlight(source_code, scope_name, output, cancellation_flag) } @@ -225,15 +308,8 @@ impl TSHighlighter { .renderer .render(highlights, source_code, &|s| self.attribute_strings[s.0]); match result { - Err(Error::Cancelled) => { - return ErrorCode::Timeout; - } - Err(Error::InvalidLanguage) => { - return ErrorCode::InvalidLanguage; - } - Err(Error::Unknown) => { - return ErrorCode::Timeout; - } + Err(Error::Cancelled | Error::Unknown) => ErrorCode::Timeout, + Err(Error::InvalidLanguage) => ErrorCode::InvalidLanguage, Ok(()) => ErrorCode::Ok, } } else { @@ -242,15 +318,15 @@ impl TSHighlighter { } } -fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { - unsafe { result.as_ref() }.unwrap_or_else(|| { +unsafe fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { + result.as_ref().unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); }) } -fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { - unsafe { result.as_mut() }.unwrap_or_else(|| { +unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { + result.as_mut().unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); }) @@ -258,7 +334,7 @@ fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { fn unwrap(result: Result) -> T { result.unwrap_or_else(|error| { - eprintln!("tree-sitter highlight error: {}", error); + eprintln!("tree-sitter highlight error: {error}"); abort(); }) } diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index 8a79c62..a7a2336 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -1,9 +1,14 @@ +#![doc = include_str!("../README.md")] + pub mod c_lib; -pub mod util; -pub use c_lib as c; +use std::{ + collections::HashSet, + iter, mem, ops, str, + sync::atomic::{AtomicUsize, Ordering}, +}; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{iter, mem, ops, str, usize}; +pub use c_lib as c; +use lazy_static::lazy_static; use thiserror::Error; use tree_sitter::{ Language, LossyUtf8, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, @@ -14,6 +19,65 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100; const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024; const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000; +lazy_static! { + static ref STANDARD_CAPTURE_NAMES: HashSet<&'static str> = vec![ + "attribute", + "boolean", + "carriage-return", + "comment", + "comment.documentation", + "constant", + "constant.builtin", + "constructor", + "constructor.builtin", + "embedded", + "error", + "escape", + "function", + "function.builtin", + "keyword", + "markup", + "markup.bold", + "markup.heading", + "markup.italic", + "markup.link", + "markup.link.url", + "markup.list", + "markup.list.checked", + "markup.list.numbered", + "markup.list.unchecked", + "markup.list.unnumbered", + "markup.quote", + "markup.raw", + "markup.raw.block", + "markup.raw.inline", + "markup.strikethrough", + "module", + "number", + "operator", + "property", + "property.builtin", + "punctuation", + "punctuation.bracket", + "punctuation.delimiter", + "punctuation.special", + "string", + "string.escape", + "string.regexp", + "string.special", + "string.special.symbol", + "tag", + "type", + "type.builtin", + "variable", + "variable.builtin", + "variable.member", + "variable.parameter", + ] + .into_iter() + .collect(); +} + /// Indicates which highlight should be applied to a region of source code. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct Highlight(pub usize); @@ -42,6 +106,7 @@ pub enum HighlightEvent { /// This struct is immutable and can be shared between threads. pub struct HighlightConfiguration { pub language: Language, + pub language_name: String, pub query: Query, combined_injections_query: Option, locals_pattern_index: usize, @@ -62,7 +127,7 @@ pub struct HighlightConfiguration { /// syntax highlighting calls. A separate highlighter is needed for each thread that /// is performing highlighting. pub struct Highlighter { - parser: Parser, + pub parser: Parser, cursors: Vec, } @@ -92,6 +157,7 @@ where F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, { source: &'a [u8], + language_name: &'a str, byte_offset: usize, highlighter: &'a mut Highlighter, injection_callback: F, @@ -105,7 +171,7 @@ where struct HighlightIterLayer<'a> { _tree: Tree, cursor: QueryCursor, - captures: iter::Peekable>, + captures: iter::Peekable>, config: &'a HighlightConfiguration, highlight_end_stack: Vec, scope_stack: Vec>, @@ -113,9 +179,16 @@ struct HighlightIterLayer<'a> { depth: usize, } +impl Default for Highlighter { + fn default() -> Self { + Self::new() + } +} + impl Highlighter { + #[must_use] pub fn new() -> Self { - Highlighter { + Self { parser: Parser::new(), cursors: Vec::new(), } @@ -135,6 +208,7 @@ impl Highlighter { ) -> Result> + 'a, Error> { let layers = HighlightIterLayer::new( source, + None, self, cancellation_flag, &mut injection_callback, @@ -150,12 +224,13 @@ impl Highlighter { assert_ne!(layers.len(), 0); let mut result = HighlightIter { source, + language_name: &config.language_name, byte_offset: 0, injection_callback, cancellation_flag, highlighter: self, iter_count: 0, - layers: layers, + layers, next_event: None, last_highlight_range: None, }; @@ -173,14 +248,15 @@ impl HighlightConfiguration { /// * `language` - The Tree-sitter `Language` that should be used for parsing. /// * `highlights_query` - A string containing tree patterns for syntax highlighting. This /// should be non-empty, otherwise no syntax highlights will be added. - /// * `injections_query` - A string containing tree patterns for injecting other languages - /// into the document. This can be empty if no injections are desired. - /// * `locals_query` - A string containing tree patterns for tracking local variable - /// definitions and references. This can be empty if local variable tracking is not needed. + /// * `injections_query` - A string containing tree patterns for injecting other languages into + /// the document. This can be empty if no injections are desired. + /// * `locals_query` - A string containing tree patterns for tracking local variable definitions + /// and references. This can be empty if local variable tracking is not needed. /// /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. pub fn new( language: Language, + name: impl Into, highlights_query: &str, injection_query: &str, locals_query: &str, @@ -195,7 +271,7 @@ impl HighlightConfiguration { // Construct a single query by concatenating the three query strings, but record the // range of pattern indices that belong to each individual string. - let mut query = Query::new(language, &query_source)?; + let mut query = Query::new(&language, &query_source)?; let mut locals_pattern_index = 0; let mut highlights_pattern_index = 0; for i in 0..(query.pattern_count()) { @@ -212,7 +288,7 @@ impl HighlightConfiguration { // Construct a separate query just for dealing with the 'combined injections'. // Disable the combined injection patterns in the main query. - let mut combined_injections_query = Query::new(language, injection_query)?; + let mut combined_injections_query = Query::new(&language, injection_query)?; let mut has_combined_queries = false; for pattern_index in 0..locals_pattern_index { let settings = query.property_settings(pattern_index); @@ -249,7 +325,7 @@ impl HighlightConfiguration { let mut local_scope_capture_index = None; for (i, name) in query.capture_names().iter().enumerate() { let i = Some(i as u32); - match name.as_str() { + match *name { "injection.content" => injection_content_capture_index = i, "injection.language" => injection_language_capture_index = i, "local.definition" => local_def_capture_index = i, @@ -261,8 +337,9 @@ impl HighlightConfiguration { } let highlight_indices = vec![None; query.capture_names().len()]; - Ok(HighlightConfiguration { + Ok(Self { language, + language_name: name.into(), query, combined_injections_query, locals_pattern_index, @@ -279,7 +356,8 @@ impl HighlightConfiguration { } /// Get a slice containing all of the highlight names used in the configuration. - pub fn names(&self) -> &[String] { + #[must_use] + pub const fn names(&self) -> &[&str] { self.query.capture_names() } @@ -303,7 +381,7 @@ impl HighlightConfiguration { let mut best_index = None; let mut best_match_len = 0; - for (i, recognized_name) in recognized_names.into_iter().enumerate() { + for (i, recognized_name) in recognized_names.iter().enumerate() { let mut len = 0; let mut matches = true; for part in recognized_name.as_ref().split('.') { @@ -321,16 +399,35 @@ impl HighlightConfiguration { best_index.map(Highlight) })); } + + // Return the list of this configuration's capture names that are neither present in the + // list of predefined 'canonical' names nor start with an underscore (denoting 'private' + // captures used as part of capture internals). + #[must_use] + pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&str> { + let capture_names = if capture_names.is_empty() { + &*STANDARD_CAPTURE_NAMES + } else { + capture_names + }; + self.names() + .iter() + .filter(|&n| !(n.starts_with('_') || capture_names.contains(n))) + .copied() + .collect() + } } impl<'a> HighlightIterLayer<'a> { /// Create a new 'layer' of highlighting for this document. /// - /// In the even that the new layer contains "combined injections" (injections where multiple + /// In the event that the new layer contains "combined injections" (injections where multiple /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and /// added to the returned vector. + #[allow(clippy::too_many_arguments)] fn new Option<&'a HighlightConfiguration> + 'a>( source: &'a [u8], + parent_name: Option<&str>, highlighter: &mut Highlighter, cancellation_flag: Option<&'a AtomicUsize>, injection_callback: &mut F, @@ -344,7 +441,7 @@ impl<'a> HighlightIterLayer<'a> { if highlighter.parser.set_included_ranges(&ranges).is_ok() { highlighter .parser - .set_language(config.language) + .set_language(&config.language) .map_err(|_| Error::InvalidLanguage)?; unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) }; @@ -353,7 +450,7 @@ impl<'a> HighlightIterLayer<'a> { .parse(source, None) .ok_or(Error::Cancelled)?; unsafe { highlighter.parser.set_cancellation_flag(None) }; - let mut cursor = highlighter.cursors.pop().unwrap_or(QueryCursor::new()); + let mut cursor = highlighter.cursors.pop().unwrap_or_default(); // Process combined injections. if let Some(combined_injections_query) = &config.combined_injections_query { @@ -363,8 +460,13 @@ impl<'a> HighlightIterLayer<'a> { cursor.matches(combined_injections_query, tree.root_node(), source); for mat in matches { let entry = &mut injections_by_pattern_index[mat.pattern_index]; - let (language_name, content_node, include_children) = - injection_for_match(config, combined_injections_query, &mat, source); + let (language_name, content_node, include_children) = injection_for_match( + config, + parent_name, + combined_injections_query, + &mat, + source, + ); if language_name.is_some() { entry.0 = language_name; } @@ -418,12 +520,12 @@ impl<'a> HighlightIterLayer<'a> { if queue.is_empty() { break; - } else { - let (next_config, next_depth, next_ranges) = queue.remove(0); - config = next_config; - depth = next_depth; - ranges = next_ranges; } + + let (next_config, next_depth, next_ranges) = queue.remove(0); + config = next_config; + depth = next_depth; + ranges = next_ranges; } Ok(result) @@ -432,12 +534,12 @@ impl<'a> HighlightIterLayer<'a> { // Compute the ranges that should be included when parsing an injection. // This takes into account three things: // * `parent_ranges` - The ranges must all fall within the *current* layer's ranges. - // * `nodes` - Every injection takes place within a set of nodes. The injection ranges - // are the ranges of those nodes. - // * `includes_children` - For some injections, the content nodes' children should be - // excluded from the nested document, so that only the content nodes' *own* content - // is reparsed. For other injections, the content nodes' entire ranges should be - // reparsed, including the ranges of their children. + // * `nodes` - Every injection takes place within a set of nodes. The injection ranges are the + // ranges of those nodes. + // * `includes_children` - For some injections, the content nodes' children should be excluded + // from the nested document, so that only the content nodes' *own* content is reparsed. For + // other injections, the content nodes' entire ranges should be reparsed, including the ranges + // of their children. fn intersect_ranges( parent_ranges: &[Range], nodes: &[Node], @@ -449,7 +551,7 @@ impl<'a> HighlightIterLayer<'a> { let mut parent_range = parent_range_iter .next() .expect("Layers should only be constructed with non-empty ranges vectors"); - for node in nodes.iter() { + for node in nodes { let mut preceding_range = Range { start_byte: 0, start_point: Point::new(0, 0), @@ -472,7 +574,7 @@ impl<'a> HighlightIterLayer<'a> { Some(child.range()) } }) - .chain([following_range].iter().cloned()) + .chain(std::iter::once(following_range)) { let mut range = Range { start_byte: preceding_range.end_byte, @@ -532,7 +634,7 @@ impl<'a> HighlightIterLayer<'a> { .captures .peek() .map(|(m, i)| m.captures[*i].node.start_byte()); - let next_end = self.highlight_end_stack.last().cloned(); + let next_end = self.highlight_end_stack.last().copied(); match (next_start, next_end) { (Some(start), Some(end)) => { if start < end { @@ -589,10 +691,9 @@ where self.layers[0..(i + 1)].rotate_left(1); } break; - } else { - let layer = self.layers.remove(0); - self.highlighter.cursors.push(layer.cursor); } + let layer = self.layers.remove(0); + self.highlighter.cursors.push(layer.cursor); } } @@ -664,7 +765,7 @@ where // If any previous highlight ends before this node starts, then before // processing this capture, emit the source code up until the end of the // previous highlight, and an end event for that highlight. - if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + if let Some(end_byte) = layer.highlight_end_stack.last().copied() { if end_byte <= range.start { layer.highlight_end_stack.pop(); return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); @@ -673,20 +774,26 @@ where } // If there are no more captures, then emit any remaining highlight end events. // And if there are none of those, then just advance to the end of the document. - else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } else { + else { + if let Some(end_byte) = layer.highlight_end_stack.last().copied() { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } return self.emit_event(self.source.len(), None); - }; + } let (mut match_, capture_index) = layer.captures.next().unwrap(); let mut capture = match_.captures[capture_index]; // If this capture represents an injection, then process the injection. if match_.pattern_index < layer.config.locals_pattern_index { - let (language_name, content_node, include_children) = - injection_for_match(&layer.config, &layer.config.query, &match_, &self.source); + let (language_name, content_node, include_children) = injection_for_match( + layer.config, + Some(self.language_name), + &layer.config.query, + &match_, + self.source, + ); // Explicitly remove this match so that none of its other captures will remain // in the stream of captures. @@ -704,6 +811,7 @@ where if !ranges.is_empty() { match HighlightIterLayer::new( self.source, + Some(self.language_name), self.highlighter, self.cancellation_flag, &mut self.injection_callback, @@ -746,12 +854,9 @@ where local_defs: Vec::new(), }; for prop in layer.config.query.property_settings(match_.pattern_index) { - match prop.key.as_ref() { - "local.scope-inherits" => { - scope.inherits = - prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); - } - _ => {} + if prop.key.as_ref() == "local.scope-inherits" { + scope.inherits = + prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); } } layer.scope_stack.push(scope); @@ -782,26 +887,24 @@ where } // If the node represents a reference, then try to find the corresponding // definition in the scope stack. - else if Some(capture.index) == layer.config.local_ref_capture_index { - if definition_highlight.is_none() { - definition_highlight = None; - if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { - for scope in layer.scope_stack.iter().rev() { - if let Some(highlight) = - scope.local_defs.iter().rev().find_map(|def| { - if def.name == name && range.start >= def.value_range.end { - Some(def.highlight) - } else { - None - } - }) - { - reference_highlight = highlight; - break; - } - if !scope.inherits { - break; + else if Some(capture.index) == layer.config.local_ref_capture_index + && definition_highlight.is_none() + { + definition_highlight = None; + if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { + for scope in layer.scope_stack.iter().rev() { + if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| { + if def.name == name && range.start >= def.value_range.end { + Some(def.highlight) + } else { + None } + }) { + reference_highlight = highlight; + break; + } + if !scope.inherits { + break; } } } @@ -831,34 +934,26 @@ where } } - // If the current node was found to be a local variable, then skip over any - // highlighting patterns that are disabled for local variables. - if definition_highlight.is_some() || reference_highlight.is_some() { - while layer.config.non_local_variable_patterns[match_.pattern_index] { - match_.remove(); - if let Some((next_match, next_capture_index)) = layer.captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = layer.captures.next().unwrap().0; - continue; - } - } - - self.sort_layers(); - continue 'main; - } - } - - // Once a highlighting pattern is found for the current node, skip over - // any later highlighting patterns that also match this node. Captures - // for a given node are ordered by pattern index, so these subsequent + // Once a highlighting pattern is found for the current node, keep iterating over + // any later highlighting patterns that also match this node and set the match to it. + // Captures for a given node are ordered by pattern index, so these subsequent // captures are guaranteed to be for highlighting, not injections or // local variables. while let Some((next_match, next_capture_index)) = layer.captures.peek() { let next_capture = next_match.captures[*next_capture_index]; if next_capture.node == capture.node { - layer.captures.next(); + let following_match = layer.captures.next().unwrap().0; + // If the current node was found to be a local variable, then ignore + // the following match if it's a highlighting pattern that is disabled + // for local variables. + if (definition_highlight.is_some() || reference_highlight.is_some()) + && layer.config.non_local_variable_patterns[following_match.pattern_index] + { + continue; + } + match_.remove(); + capture = next_capture; + match_ = following_match; } else { break; } @@ -885,9 +980,16 @@ where } } +impl Default for HtmlRenderer { + fn default() -> Self { + Self::new() + } +} + impl HtmlRenderer { + #[must_use] pub fn new() -> Self { - let mut result = HtmlRenderer { + let mut result = Self { html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY), line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY), carriage_return_highlight: None, @@ -987,10 +1089,21 @@ impl HtmlRenderer { self.html.extend(b""); } - fn add_text<'a, F>(&mut self, src: &[u8], highlights: &Vec, attribute_callback: &F) + fn add_text<'a, F>(&mut self, src: &[u8], highlights: &[Highlight], attribute_callback: &F) where F: Fn(Highlight) -> &'a [u8], { + pub const fn html_escape(c: u8) -> Option<&'static [u8]> { + match c as char { + '>' => Some(b">"), + '<' => Some(b"<"), + '&' => Some(b"&"), + '\'' => Some(b"'"), + '"' => Some(b"""), + _ => None, + } + } + let mut last_char_was_cr = false; for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) { // Don't render carriage return characters, but allow lone carriage returns (not @@ -1014,7 +1127,7 @@ impl HtmlRenderer { highlights .iter() .for_each(|scope| self.start_highlight(*scope, attribute_callback)); - } else if let Some(escape) = util::html_escape(c) { + } else if let Some(escape) = html_escape(c) { self.html.extend_from_slice(escape); } else { self.html.push(c); @@ -1024,7 +1137,8 @@ impl HtmlRenderer { } fn injection_for_match<'a>( - config: &HighlightConfiguration, + config: &'a HighlightConfiguration, + parent_name: Option<&'a str>, query: &'a Query, query_match: &QueryMatch<'a, 'a>, source: &'a [u8], @@ -1034,6 +1148,7 @@ fn injection_for_match<'a>( let mut language_name = None; let mut content_node = None; + for capture in query_match.captures { let index = Some(capture.index); if index == language_capture_index { @@ -1051,7 +1166,25 @@ fn injection_for_match<'a>( // that sets the injection.language key. "injection.language" => { if language_name.is_none() { - language_name = prop.value.as_ref().map(|s| s.as_ref()) + language_name = prop.value.as_ref().map(std::convert::AsRef::as_ref); + } + } + + // Setting the `injection.self` key can be used to specify that the + // language name should be the same as the language of the current + // layer. + "injection.self" => { + if language_name.is_none() { + language_name = Some(config.language_name.as_str()); + } + } + + // Setting the `injection.parent` key can be used to specify that + // the language name should be the same as the language of the + // parent layer + "injection.parent" => { + if language_name.is_none() { + language_name = parent_name; } } diff --git a/highlight/src/util.rs b/highlight/src/util.rs deleted file mode 100644 index 29adb13..0000000 --- a/highlight/src/util.rs +++ /dev/null @@ -1,10 +0,0 @@ -pub fn html_escape(c: u8) -> Option<&'static [u8]> { - match c as char { - '>' => Some(b">"), - '<' => Some(b"<"), - '&' => Some(b"&"), - '\'' => Some(b"'"), - '"' => Some(b"""), - _ => None, - } -} diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 39e0791..4d9e8b9 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,33 +1,45 @@ [package] name = "tree-sitter" +version.workspace = true description = "Rust bindings to the Tree-sitter parsing library" -version = "0.20.10" -authors = ["Max Brunsfeld "] -edition = "2021" -license = "MIT" +authors.workspace = true +edition.workspace = true +rust-version = "1.65" readme = "binding_rust/README.md" -keywords = ["incremental", "parsing"] +homepage.workspace = true +repository.workspace = true +license.workspace = true +keywords.workspace = true categories = ["api-bindings", "parsing", "text-editors"] -repository = "https://github.com/tree-sitter/tree-sitter" -rust-version.workspace = true build = "binding_rust/build.rs" +links = "tree-sitter" include = [ "/binding_rust/*", "/Cargo.toml", - "/include/*", "/src/*.h", "/src/*.c", "/src/unicode/*", + "/src/wasm/*", + "/include/tree_sitter/api.h", ] +[features] +wasm = ["wasmtime-c-api"] + [dependencies] -lazy_static = { version = "1.2.0", optional = true } -regex = "1" +regex.workspace = true + +[dependencies.wasmtime-c-api] +version = "19" +optional = true +package = "wasmtime-c-api-impl" +default-features = false [build-dependencies] -cc = "^1.0.58" +bindgen = { version = "0.69.4", optional = true } +cc.workspace = true [lib] path = "binding_rust/lib.rs" diff --git a/lib/README.md b/lib/README.md index 82ebc5a..231fe2a 100644 --- a/lib/README.md +++ b/lib/README.md @@ -1,5 +1,4 @@ -Subdirectories --------------- +## Subdirectories * [`src`](./src) - C source code for the Tree-sitter library * [`include`](./include) - C headers for the Tree-sitter library diff --git a/lib/binding_rust/README.md b/lib/binding_rust/README.md index dffe766..2dbe907 100644 --- a/lib/binding_rust/README.md +++ b/lib/binding_rust/README.md @@ -1,35 +1,22 @@ # Rust Tree-sitter -[![Crates.io](https://img.shields.io/crates/v/tree-sitter.svg)](https://crates.io/crates/tree-sitter) +[![crates.io badge]][crates.io] + +[crates.io]: https://crates.io/crates/tree-sitter +[crates.io badge]: https://img.shields.io/crates/v/tree-sitter.svg?color=%23B48723 Rust bindings to the [Tree-sitter][] parsing library. -### Basic Usage +## Basic Usage First, create a parser: ```rust -use tree_sitter::{Parser, Language}; +use tree_sitter::{InputEdit, Language, Parser, Point}; let mut parser = Parser::new(); ``` -Tree-sitter languages consist of generated C code. To make sure they're properly compiled and linked, you can create a [build script](https://doc.rust-lang.org/cargo/reference/build-scripts.html) like the following (assuming `tree-sitter-javascript` is in your root directory): - -```rust -use std::path::PathBuf; - -fn main() { - let dir: PathBuf = ["tree-sitter-javascript", "src"].iter().collect(); - - cc::Build::new() - .include(&dir) - .file(dir.join("parser.c")) - .file(dir.join("scanner.c")) - .compile("tree-sitter-javascript"); -} -``` - Add the `cc` crate to your `Cargo.toml` under `[build-dependencies]`: ```toml @@ -37,22 +24,25 @@ Add the `cc` crate to your `Cargo.toml` under `[build-dependencies]`: cc="*" ``` -To then use languages from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`. Then you can assign them to the parser. +Then, add a language as a dependency: -```rust -extern "C" { fn tree_sitter_c() -> Language; } -extern "C" { fn tree_sitter_rust() -> Language; } -extern "C" { fn tree_sitter_javascript() -> Language; } +```toml +[dependencies] +tree-sitter = "0.22" +tree-sitter-rust = "0.21" +``` + +To then use a language, you assign them to the parser. -let language = unsafe { tree_sitter_rust() }; -parser.set_language(language).unwrap(); +```rust +parser.set_language(&tree_sitter_rust::language()).expect("Error loading Rust grammar"); ``` Now you can parse source code: ```rust let source_code = "fn test() {}"; -let tree = parser.parse(source_code, None).unwrap(); +let mut tree = parser.parse(source_code, None).unwrap(); let root_node = tree.root_node(); assert_eq!(root_node.kind(), "source_file"); @@ -62,12 +52,13 @@ assert_eq!(root_node.end_position().column, 12); ### Editing -Once you have a syntax tree, you can update it when your source code changes. Passing in the previous edited tree makes `parse` run much more quickly: +Once you have a syntax tree, you can update it when your source code changes. +Passing in the previous edited tree makes `parse` run much more quickly: ```rust -let new_source_code = "fn test(a: u32) {}" +let new_source_code = "fn test(a: u32) {}"; -tree.edit(InputEdit { +tree.edit(&InputEdit { start_byte: 8, old_end_byte: 8, new_end_byte: 14, @@ -81,7 +72,8 @@ let new_tree = parser.parse(new_source_code, Some(&tree)); ### Text Input -The source code to parse can be provided either as a string, a slice, a vector, or as a function that returns a slice. The text can be encoded as either UTF8 or UTF16: +The source code to parse can be provided either as a string, a slice, a vector, +or as a function that returns a slice. The text can be encoded as either UTF8 or UTF16: ```rust // Store some source code in an array of lines. @@ -93,14 +85,14 @@ let lines = &[ // Parse the source code using a custom callback. The callback is called // with both a byte offset and a row/column offset. -let tree = parser.parse_with(&mut |_byte: u32, position: Point| -> &[u8] { +let tree = parser.parse_with(&mut |_byte: usize, position: Point| -> &[u8] { let row = position.row as usize; let column = position.column as usize; if row < lines.len() { if column < lines[row].as_bytes().len() { &lines[row].as_bytes()[column..] } else { - "\n".as_bytes() + b"\n" } } else { &[] diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index be117f8..8520198 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -1,38 +1,46 @@ -/* automatically generated by rust-bindgen 0.59.2 */ +/* automatically generated by rust-bindgen 0.69.4 */ +pub const TREE_SITTER_LANGUAGE_VERSION: u32 = 14; +pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: u32 = 13; +pub type TSStateId = u16; pub type TSSymbol = u16; pub type TSFieldId = u16; #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSLanguage { _unused: [u8; 0], } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSParser { _unused: [u8; 0], } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSTree { _unused: [u8; 0], } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSQuery { _unused: [u8; 0], } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSQueryCursor { _unused: [u8; 0], } -pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0; -pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1; +#[repr(C)] +#[derive(Debug)] +pub struct TSLookaheadIterator { + _unused: [u8; 0], +} +pub const TSInputEncodingUTF8: TSInputEncoding = 0; +pub const TSInputEncodingUTF16: TSInputEncoding = 1; pub type TSInputEncoding = ::std::os::raw::c_uint; -pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0; -pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1; -pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2; +pub const TSSymbolTypeRegular: TSSymbolType = 0; +pub const TSSymbolTypeAnonymous: TSSymbolType = 1; +pub const TSSymbolTypeAuxiliary: TSSymbolType = 2; pub type TSSymbolType = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -49,7 +57,7 @@ pub struct TSRange { pub end_byte: u32, } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSInput { pub payload: *mut ::std::os::raw::c_void, pub read: ::std::option::Option< @@ -62,18 +70,18 @@ pub struct TSInput { >, pub encoding: TSInputEncoding, } -pub const TSLogType_TSLogTypeParse: TSLogType = 0; -pub const TSLogType_TSLogTypeLex: TSLogType = 1; +pub const TSLogTypeParse: TSLogType = 0; +pub const TSLogTypeLex: TSLogType = 1; pub type TSLogType = ::std::os::raw::c_uint; #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSLogger { pub payload: *mut ::std::os::raw::c_void, pub log: ::std::option::Option< unsafe extern "C" fn( payload: *mut ::std::os::raw::c_void, - arg1: TSLogType, - arg2: *const ::std::os::raw::c_char, + log_type: TSLogType, + buffer: *const ::std::os::raw::c_char, ), >, } @@ -99,45 +107,45 @@ pub struct TSNode { pub struct TSTreeCursor { pub tree: *const ::std::os::raw::c_void, pub id: *const ::std::os::raw::c_void, - pub context: [u32; 2usize], + pub context: [u32; 3usize], } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSQueryCapture { pub node: TSNode, pub index: u32, } -pub const TSQuantifier_TSQuantifierZero: TSQuantifier = 0; -pub const TSQuantifier_TSQuantifierZeroOrOne: TSQuantifier = 1; -pub const TSQuantifier_TSQuantifierZeroOrMore: TSQuantifier = 2; -pub const TSQuantifier_TSQuantifierOne: TSQuantifier = 3; -pub const TSQuantifier_TSQuantifierOneOrMore: TSQuantifier = 4; +pub const TSQuantifierZero: TSQuantifier = 0; +pub const TSQuantifierZeroOrOne: TSQuantifier = 1; +pub const TSQuantifierZeroOrMore: TSQuantifier = 2; +pub const TSQuantifierOne: TSQuantifier = 3; +pub const TSQuantifierOneOrMore: TSQuantifier = 4; pub type TSQuantifier = ::std::os::raw::c_uint; #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSQueryMatch { pub id: u32, pub pattern_index: u16, pub capture_count: u16, pub captures: *const TSQueryCapture, } -pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeDone: TSQueryPredicateStepType = 0; -pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture: TSQueryPredicateStepType = 1; -pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeString: TSQueryPredicateStepType = 2; +pub const TSQueryPredicateStepTypeDone: TSQueryPredicateStepType = 0; +pub const TSQueryPredicateStepTypeCapture: TSQueryPredicateStepType = 1; +pub const TSQueryPredicateStepTypeString: TSQueryPredicateStepType = 2; pub type TSQueryPredicateStepType = ::std::os::raw::c_uint; #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSQueryPredicateStep { pub type_: TSQueryPredicateStepType, pub value_id: u32, } -pub const TSQueryError_TSQueryErrorNone: TSQueryError = 0; -pub const TSQueryError_TSQueryErrorSyntax: TSQueryError = 1; -pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2; -pub const TSQueryError_TSQueryErrorField: TSQueryError = 3; -pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4; -pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5; -pub const TSQueryError_TSQueryErrorLanguage: TSQueryError = 6; +pub const TSQueryErrorNone: TSQueryError = 0; +pub const TSQueryErrorSyntax: TSQueryError = 1; +pub const TSQueryErrorNodeType: TSQueryError = 2; +pub const TSQueryErrorField: TSQueryError = 3; +pub const TSQueryErrorCapture: TSQueryError = 4; +pub const TSQueryErrorStructure: TSQueryError = 5; +pub const TSQueryErrorLanguage: TSQueryError = 6; pub type TSQueryError = ::std::os::raw::c_uint; extern "C" { #[doc = " Create a new parser."] @@ -145,94 +153,30 @@ extern "C" { } extern "C" { #[doc = " Delete the parser, freeing all of the memory that it used."] - pub fn ts_parser_delete(parser: *mut TSParser); -} -extern "C" { - #[doc = " Set the language that the parser should use for parsing."] - #[doc = ""] - #[doc = " Returns a boolean indicating whether or not the language was successfully"] - #[doc = " assigned. True means assignment succeeded. False means there was a version"] - #[doc = " mismatch: the language was generated with an incompatible version of the"] - #[doc = " Tree-sitter CLI. Check the language's version using `ts_language_version`"] - #[doc = " and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and"] - #[doc = " `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants."] - pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool; + pub fn ts_parser_delete(self_: *mut TSParser); } extern "C" { #[doc = " Get the parser's current language."] pub fn ts_parser_language(self_: *const TSParser) -> *const TSLanguage; } extern "C" { - #[doc = " Set the ranges of text that the parser should include when parsing."] - #[doc = ""] - #[doc = " By default, the parser will always include entire documents. This function"] - #[doc = " allows you to parse only a *portion* of a document but still return a syntax"] - #[doc = " tree whose ranges match up with the document as a whole. You can also pass"] - #[doc = " multiple disjoint ranges."] - #[doc = ""] - #[doc = " The second and third parameters specify the location and length of an array"] - #[doc = " of ranges. The parser does *not* take ownership of these ranges; it copies"] - #[doc = " the data, so it doesn't matter how these ranges are allocated."] - #[doc = ""] - #[doc = " If `length` is zero, then the entire document will be parsed. Otherwise,"] - #[doc = " the given ranges must be ordered from earliest to latest in the document,"] - #[doc = " and they must not overlap. That is, the following must hold for all"] - #[doc = " `i` < `length - 1`: ranges[i].end_byte <= ranges[i + 1].start_byte"] - #[doc = ""] - #[doc = " If this requirement is not satisfied, the operation will fail, the ranges"] - #[doc = " will not be assigned, and this function will return `false`. On success,"] - #[doc = " this function returns `true`"] + #[doc = " Set the language that the parser should use for parsing.\n\n Returns a boolean indicating whether or not the language was successfully\n assigned. True means assignment succeeded. False means there was a version\n mismatch: the language was generated with an incompatible version of the\n Tree-sitter CLI. Check the language's version using [`ts_language_version`]\n and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and\n [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants."] + pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool; +} +extern "C" { + #[doc = " Set the ranges of text that the parser should include when parsing.\n\n By default, the parser will always include entire documents. This function\n allows you to parse only a *portion* of a document but still return a syntax\n tree whose ranges match up with the document as a whole. You can also pass\n multiple disjoint ranges.\n\n The second and third parameters specify the location and length of an array\n of ranges. The parser does *not* take ownership of these ranges; it copies\n the data, so it doesn't matter how these ranges are allocated.\n\n If `count` is zero, then the entire document will be parsed. Otherwise,\n the given ranges must be ordered from earliest to latest in the document,\n and they must not overlap. That is, the following must hold for all:\n\n `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte`\n\n If this requirement is not satisfied, the operation will fail, the ranges\n will not be assigned, and this function will return `false`. On success,\n this function returns `true`"] pub fn ts_parser_set_included_ranges( self_: *mut TSParser, ranges: *const TSRange, - length: u32, + count: u32, ) -> bool; } extern "C" { - #[doc = " Get the ranges of text that the parser will include when parsing."] - #[doc = ""] - #[doc = " The returned pointer is owned by the parser. The caller should not free it"] - #[doc = " or write to it. The length of the array will be written to the given"] - #[doc = " `length` pointer."] - pub fn ts_parser_included_ranges(self_: *const TSParser, length: *mut u32) -> *const TSRange; -} -extern "C" { - #[doc = " Use the parser to parse some source code and create a syntax tree."] - #[doc = ""] - #[doc = " If you are parsing this document for the first time, pass `NULL` for the"] - #[doc = " `old_tree` parameter. Otherwise, if you have already parsed an earlier"] - #[doc = " version of this document and the document has since been edited, pass the"] - #[doc = " previous syntax tree so that the unchanged parts of it can be reused."] - #[doc = " This will save time and memory. For this to work correctly, you must have"] - #[doc = " already edited the old syntax tree using the `ts_tree_edit` function in a"] - #[doc = " way that exactly matches the source code changes."] - #[doc = ""] - #[doc = " The `TSInput` parameter lets you specify how to read the text. It has the"] - #[doc = " following three fields:"] - #[doc = " 1. `read`: A function to retrieve a chunk of text at a given byte offset"] - #[doc = " and (row, column) position. The function should return a pointer to the"] - #[doc = " text and write its length to the `bytes_read` pointer. The parser does"] - #[doc = " not take ownership of this buffer; it just borrows it until it has"] - #[doc = " finished reading it. The function should write a zero value to the"] - #[doc = " `bytes_read` pointer to indicate the end of the document."] - #[doc = " 2. `payload`: An arbitrary pointer that will be passed to each invocation"] - #[doc = " of the `read` function."] - #[doc = " 3. `encoding`: An indication of how the text is encoded. Either"] - #[doc = " `TSInputEncodingUTF8` or `TSInputEncodingUTF16`."] - #[doc = ""] - #[doc = " This function returns a syntax tree on success, and `NULL` on failure. There"] - #[doc = " are three possible reasons for failure:"] - #[doc = " 1. The parser does not have a language assigned. Check for this using the"] - #[doc = "`ts_parser_language` function."] - #[doc = " 2. Parsing was cancelled due to a timeout that was set by an earlier call to"] - #[doc = " the `ts_parser_set_timeout_micros` function. You can resume parsing from"] - #[doc = " where the parser left out by calling `ts_parser_parse` again with the"] - #[doc = " same arguments. Or you can start parsing from scratch by first calling"] - #[doc = " `ts_parser_reset`."] - #[doc = " 3. Parsing was cancelled using a cancellation flag that was set by an"] - #[doc = " earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing"] - #[doc = " from where the parser left out by calling `ts_parser_parse` again with"] - #[doc = " the same arguments."] + #[doc = " Get the ranges of text that the parser will include when parsing.\n\n The returned pointer is owned by the parser. The caller should not free it\n or write to it. The length of the array will be written to the given\n `count` pointer."] + pub fn ts_parser_included_ranges(self_: *const TSParser, count: *mut u32) -> *const TSRange; +} +extern "C" { + #[doc = " Use the parser to parse some source code and create a syntax tree.\n\n If you are parsing this document for the first time, pass `NULL` for the\n `old_tree` parameter. Otherwise, if you have already parsed an earlier\n version of this document and the document has since been edited, pass the\n previous syntax tree so that the unchanged parts of it can be reused.\n This will save time and memory. For this to work correctly, you must have\n already edited the old syntax tree using the [`ts_tree_edit`] function in a\n way that exactly matches the source code changes.\n\n The [`TSInput`] parameter lets you specify how to read the text. It has the\n following three fields:\n 1. [`read`]: A function to retrieve a chunk of text at a given byte offset\n and (row, column) position. The function should return a pointer to the\n text and write its length to the [`bytes_read`] pointer. The parser does\n not take ownership of this buffer; it just borrows it until it has\n finished reading it. The function should write a zero value to the\n [`bytes_read`] pointer to indicate the end of the document.\n 2. [`payload`]: An arbitrary pointer that will be passed to each invocation\n of the [`read`] function.\n 3. [`encoding`]: An indication of how the text is encoded. Either\n `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.\n\n This function returns a syntax tree on success, and `NULL` on failure. There\n are three possible reasons for failure:\n 1. The parser does not have a language assigned. Check for this using the\n[`ts_parser_language`] function.\n 2. Parsing was cancelled due to a timeout that was set by an earlier call to\n the [`ts_parser_set_timeout_micros`] function. You can resume parsing from\n where the parser left out by calling [`ts_parser_parse`] again with the\n same arguments. Or you can start parsing from scratch by first calling\n [`ts_parser_reset`].\n 3. Parsing was cancelled using a cancellation flag that was set by an\n earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing\n from where the parser left out by calling [`ts_parser_parse`] again with\n the same arguments.\n\n [`read`]: TSInput::read\n [`payload`]: TSInput::payload\n [`encoding`]: TSInput::encoding\n [`bytes_read`]: TSInput::read"] pub fn ts_parser_parse( self_: *mut TSParser, old_tree: *const TSTree, @@ -240,10 +184,7 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[doc = " Use the parser to parse some source code stored in one contiguous buffer."] - #[doc = " The first two parameters are the same as in the `ts_parser_parse` function"] - #[doc = " above. The second two parameters indicate the location of the buffer and its"] - #[doc = " length in bytes."] + #[doc = " Use the parser to parse some source code stored in one contiguous buffer.\n The first two parameters are the same as in the [`ts_parser_parse`] function\n above. The second two parameters indicate the location of the buffer and its\n length in bytes."] pub fn ts_parser_parse_string( self_: *mut TSParser, old_tree: *const TSTree, @@ -252,10 +193,7 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[doc = " Use the parser to parse some source code stored in one contiguous buffer with"] - #[doc = " a given encoding. The first four parameters work the same as in the"] - #[doc = " `ts_parser_parse_string` method above. The final parameter indicates whether"] - #[doc = " the text is encoded as UTF8 or UTF16."] + #[doc = " Use the parser to parse some source code stored in one contiguous buffer with\n a given encoding. The first four parameters work the same as in the\n [`ts_parser_parse_string`] method above. The final parameter indicates whether\n the text is encoded as UTF8 or UTF16."] pub fn ts_parser_parse_string_encoding( self_: *mut TSParser, old_tree: *const TSTree, @@ -265,33 +203,19 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[doc = " Instruct the parser to start the next parse from the beginning."] - #[doc = ""] - #[doc = " If the parser previously failed because of a timeout or a cancellation, then"] - #[doc = " by default, it will resume where it left off on the next call to"] - #[doc = " `ts_parser_parse` or other parsing functions. If you don't want to resume,"] - #[doc = " and instead intend to use this parser to parse some other document, you must"] - #[doc = " call `ts_parser_reset` first."] + #[doc = " Instruct the parser to start the next parse from the beginning.\n\n If the parser previously failed because of a timeout or a cancellation, then\n by default, it will resume where it left off on the next call to\n [`ts_parser_parse`] or other parsing functions. If you don't want to resume,\n and instead intend to use this parser to parse some other document, you must\n call [`ts_parser_reset`] first."] pub fn ts_parser_reset(self_: *mut TSParser); } extern "C" { - #[doc = " Set the maximum duration in microseconds that parsing should be allowed to"] - #[doc = " take before halting."] - #[doc = ""] - #[doc = " If parsing takes longer than this, it will halt early, returning NULL."] - #[doc = " See `ts_parser_parse` for more information."] - pub fn ts_parser_set_timeout_micros(self_: *mut TSParser, timeout: u64); + #[doc = " Set the maximum duration in microseconds that parsing should be allowed to\n take before halting.\n\n If parsing takes longer than this, it will halt early, returning NULL.\n See [`ts_parser_parse`] for more information."] + pub fn ts_parser_set_timeout_micros(self_: *mut TSParser, timeout_micros: u64); } extern "C" { #[doc = " Get the duration in microseconds that parsing is allowed to take."] pub fn ts_parser_timeout_micros(self_: *const TSParser) -> u64; } extern "C" { - #[doc = " Set the parser's current cancellation flag pointer."] - #[doc = ""] - #[doc = " If a non-null pointer is assigned, then the parser will periodically read"] - #[doc = " from this pointer during parsing. If it reads a non-zero value, it will"] - #[doc = " halt early, returning NULL. See `ts_parser_parse` for more information."] + #[doc = " Set the parser's current cancellation flag pointer.\n\n If a non-null pointer is assigned, then the parser will periodically read\n from this pointer during parsing. If it reads a non-zero value, it will\n halt early, returning NULL. See [`ts_parser_parse`] for more information."] pub fn ts_parser_set_cancellation_flag(self_: *mut TSParser, flag: *const usize); } extern "C" { @@ -299,11 +223,7 @@ extern "C" { pub fn ts_parser_cancellation_flag(self_: *const TSParser) -> *const usize; } extern "C" { - #[doc = " Set the logger that a parser should use during parsing."] - #[doc = ""] - #[doc = " The parser does not take ownership over the logger payload. If a logger was"] - #[doc = " previously assigned, the caller is responsible for releasing any memory"] - #[doc = " owned by the previous logger."] + #[doc = " Set the logger that a parser should use during parsing.\n\n The parser does not take ownership over the logger payload. If a logger was\n previously assigned, the caller is responsible for releasing any memory\n owned by the previous logger."] pub fn ts_parser_set_logger(self_: *mut TSParser, logger: TSLogger); } extern "C" { @@ -311,17 +231,11 @@ extern "C" { pub fn ts_parser_logger(self_: *const TSParser) -> TSLogger; } extern "C" { - #[doc = " Set the file descriptor to which the parser should write debugging graphs"] - #[doc = " during parsing. The graphs are formatted in the DOT language. You may want"] - #[doc = " to pipe these graphs directly to a `dot(1)` process in order to generate"] - #[doc = " SVG output. You can turn off this logging by passing a negative number."] - pub fn ts_parser_print_dot_graphs(self_: *mut TSParser, file: ::std::os::raw::c_int); + #[doc = " Set the file descriptor to which the parser should write debugging graphs\n during parsing. The graphs are formatted in the DOT language. You may want\n to pipe these graphs directly to a `dot(1)` process in order to generate\n SVG output. You can turn off this logging by passing a negative number."] + pub fn ts_parser_print_dot_graphs(self_: *mut TSParser, fd: ::std::os::raw::c_int); } extern "C" { - #[doc = " Create a shallow copy of the syntax tree. This is very fast."] - #[doc = ""] - #[doc = " You need to copy a syntax tree in order to use it on more than one thread at"] - #[doc = " a time, as syntax trees are not thread safe."] + #[doc = " Create a shallow copy of the syntax tree. This is very fast.\n\n You need to copy a syntax tree in order to use it on more than one thread at\n a time, as syntax trees are not thread safe."] pub fn ts_tree_copy(self_: *const TSTree) -> *mut TSTree; } extern "C" { @@ -333,45 +247,27 @@ extern "C" { pub fn ts_tree_root_node(self_: *const TSTree) -> TSNode; } extern "C" { - #[doc = " Get the root node of the syntax tree, but with its position"] - #[doc = " shifted forward by the given offset."] + #[doc = " Get the root node of the syntax tree, but with its position\n shifted forward by the given offset."] pub fn ts_tree_root_node_with_offset( self_: *const TSTree, offset_bytes: u32, - offset_point: TSPoint, + offset_extent: TSPoint, ) -> TSNode; } extern "C" { #[doc = " Get the language that was used to parse the syntax tree."] - pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage; + pub fn ts_tree_language(self_: *const TSTree) -> *const TSLanguage; } extern "C" { - #[doc = " Get the array of included ranges that was used to parse the syntax tree."] - #[doc = ""] - #[doc = " The returned pointer must be freed by the caller."] - pub fn ts_tree_included_ranges(arg1: *const TSTree, length: *mut u32) -> *mut TSRange; + #[doc = " Get the array of included ranges that was used to parse the syntax tree.\n\n The returned pointer must be freed by the caller."] + pub fn ts_tree_included_ranges(self_: *const TSTree, length: *mut u32) -> *mut TSRange; } extern "C" { - #[doc = " Edit the syntax tree to keep it in sync with source code that has been"] - #[doc = " edited."] - #[doc = ""] - #[doc = " You must describe the edit both in terms of byte offsets and in terms of"] - #[doc = " (row, column) coordinates."] + #[doc = " Edit the syntax tree to keep it in sync with source code that has been\n edited.\n\n You must describe the edit both in terms of byte offsets and in terms of\n (row, column) coordinates."] pub fn ts_tree_edit(self_: *mut TSTree, edit: *const TSInputEdit); } extern "C" { - #[doc = " Compare an old edited syntax tree to a new syntax tree representing the same"] - #[doc = " document, returning an array of ranges whose syntactic structure has changed."] - #[doc = ""] - #[doc = " For this to work correctly, the old syntax tree must have been edited such"] - #[doc = " that its ranges match up to the new tree. Generally, you'll want to call"] - #[doc = " this function right after calling one of the `ts_parser_parse` functions."] - #[doc = " You need to pass the old tree that was passed to parse, as well as the new"] - #[doc = " tree that was returned from that function."] - #[doc = ""] - #[doc = " The returned array is allocated using `malloc` and the caller is responsible"] - #[doc = " for freeing it using `free`. The length of the array will be written to the"] - #[doc = " given `length` pointer."] + #[doc = " Compare an old edited syntax tree to a new syntax tree representing the same\n document, returning an array of ranges whose syntactic structure has changed.\n\n For this to work correctly, the old syntax tree must have been edited such\n that its ranges match up to the new tree. Generally, you'll want to call\n this function right after calling one of the [`ts_parser_parse`] functions.\n You need to pass the old tree that was passed to parse, as well as the new\n tree that was returned from that function.\n\n The returned array is allocated using `malloc` and the caller is responsible\n for freeing it using `free`. The length of the array will be written to the\n given `length` pointer."] pub fn ts_tree_get_changed_ranges( old_tree: *const TSTree, new_tree: *const TSTree, @@ -380,253 +276,263 @@ extern "C" { } extern "C" { #[doc = " Write a DOT graph describing the syntax tree to the given file."] - pub fn ts_tree_print_dot_graph(arg1: *const TSTree, file_descriptor: ::std::os::raw::c_int); + pub fn ts_tree_print_dot_graph(self_: *const TSTree, file_descriptor: ::std::os::raw::c_int); } extern "C" { #[doc = " Get the node's type as a null-terminated string."] - pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char; + pub fn ts_node_type(self_: TSNode) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Get the node's type as a numerical id."] - pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol; + pub fn ts_node_symbol(self_: TSNode) -> TSSymbol; +} +extern "C" { + #[doc = " Get the node's language."] + pub fn ts_node_language(self_: TSNode) -> *const TSLanguage; +} +extern "C" { + #[doc = " Get the node's type as it appears in the grammar ignoring aliases as a\n null-terminated string."] + pub fn ts_node_grammar_type(self_: TSNode) -> *const ::std::os::raw::c_char; +} +extern "C" { + #[doc = " Get the node's type as a numerical id as it appears in the grammar ignoring\n aliases. This should be used in [`ts_language_next_state`] instead of\n [`ts_node_symbol`]."] + pub fn ts_node_grammar_symbol(self_: TSNode) -> TSSymbol; } extern "C" { #[doc = " Get the node's start byte."] - pub fn ts_node_start_byte(arg1: TSNode) -> u32; + pub fn ts_node_start_byte(self_: TSNode) -> u32; } extern "C" { #[doc = " Get the node's start position in terms of rows and columns."] - pub fn ts_node_start_point(arg1: TSNode) -> TSPoint; + pub fn ts_node_start_point(self_: TSNode) -> TSPoint; } extern "C" { #[doc = " Get the node's end byte."] - pub fn ts_node_end_byte(arg1: TSNode) -> u32; + pub fn ts_node_end_byte(self_: TSNode) -> u32; } extern "C" { #[doc = " Get the node's end position in terms of rows and columns."] - pub fn ts_node_end_point(arg1: TSNode) -> TSPoint; + pub fn ts_node_end_point(self_: TSNode) -> TSPoint; } extern "C" { - #[doc = " Get an S-expression representing the node as a string."] - #[doc = ""] - #[doc = " This string is allocated with `malloc` and the caller is responsible for"] - #[doc = " freeing it using `free`."] - pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char; + #[doc = " Get an S-expression representing the node as a string.\n\n This string is allocated with `malloc` and the caller is responsible for\n freeing it using `free`."] + pub fn ts_node_string(self_: TSNode) -> *mut ::std::os::raw::c_char; } extern "C" { - #[doc = " Check if the node is null. Functions like `ts_node_child` and"] - #[doc = " `ts_node_next_sibling` will return a null node to indicate that no such node"] - #[doc = " was found."] - pub fn ts_node_is_null(arg1: TSNode) -> bool; + #[doc = " Check if the node is null. Functions like [`ts_node_child`] and\n [`ts_node_next_sibling`] will return a null node to indicate that no such node\n was found."] + pub fn ts_node_is_null(self_: TSNode) -> bool; } extern "C" { - #[doc = " Check if the node is *named*. Named nodes correspond to named rules in the"] - #[doc = " grammar, whereas *anonymous* nodes correspond to string literals in the"] - #[doc = " grammar."] - pub fn ts_node_is_named(arg1: TSNode) -> bool; + #[doc = " Check if the node is *named*. Named nodes correspond to named rules in the\n grammar, whereas *anonymous* nodes correspond to string literals in the\n grammar."] + pub fn ts_node_is_named(self_: TSNode) -> bool; } extern "C" { - #[doc = " Check if the node is *missing*. Missing nodes are inserted by the parser in"] - #[doc = " order to recover from certain kinds of syntax errors."] - pub fn ts_node_is_missing(arg1: TSNode) -> bool; + #[doc = " Check if the node is *missing*. Missing nodes are inserted by the parser in\n order to recover from certain kinds of syntax errors."] + pub fn ts_node_is_missing(self_: TSNode) -> bool; } extern "C" { - #[doc = " Check if the node is *extra*. Extra nodes represent things like comments,"] - #[doc = " which are not required the grammar, but can appear anywhere."] - pub fn ts_node_is_extra(arg1: TSNode) -> bool; + #[doc = " Check if the node is *extra*. Extra nodes represent things like comments,\n which are not required the grammar, but can appear anywhere."] + pub fn ts_node_is_extra(self_: TSNode) -> bool; } extern "C" { #[doc = " Check if a syntax node has been edited."] - pub fn ts_node_has_changes(arg1: TSNode) -> bool; + pub fn ts_node_has_changes(self_: TSNode) -> bool; } extern "C" { #[doc = " Check if the node is a syntax error or contains any syntax errors."] - pub fn ts_node_has_error(arg1: TSNode) -> bool; + pub fn ts_node_has_error(self_: TSNode) -> bool; +} +extern "C" { + #[doc = " Check if the node is a syntax error."] + pub fn ts_node_is_error(self_: TSNode) -> bool; +} +extern "C" { + #[doc = " Get this node's parse state."] + pub fn ts_node_parse_state(self_: TSNode) -> TSStateId; +} +extern "C" { + #[doc = " Get the parse state after this node."] + pub fn ts_node_next_parse_state(self_: TSNode) -> TSStateId; } extern "C" { - #[doc = " Get the node's immediate parent."] - pub fn ts_node_parent(arg1: TSNode) -> TSNode; + #[doc = " Get the node's immediate parent.\n Prefer [`ts_node_child_containing_descendant`] for\n iterating over the node's ancestors."] + pub fn ts_node_parent(self_: TSNode) -> TSNode; } extern "C" { - #[doc = " Get the node's child at the given index, where zero represents the first"] - #[doc = " child."] - pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; + #[doc = " Get the node's child that contains `descendant`."] + pub fn ts_node_child_containing_descendant(self_: TSNode, descendant: TSNode) -> TSNode; } extern "C" { - #[doc = " Get the field name for node's child at the given index, where zero represents"] - #[doc = " the first child. Returns NULL, if no field is found."] - pub fn ts_node_field_name_for_child(arg1: TSNode, arg2: u32) -> *const ::std::os::raw::c_char; + #[doc = " Get the node's child at the given index, where zero represents the first\n child."] + pub fn ts_node_child(self_: TSNode, child_index: u32) -> TSNode; +} +extern "C" { + #[doc = " Get the field name for node's child at the given index, where zero represents\n the first child. Returns NULL, if no field is found."] + pub fn ts_node_field_name_for_child( + self_: TSNode, + child_index: u32, + ) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Get the node's number of children."] - pub fn ts_node_child_count(arg1: TSNode) -> u32; + pub fn ts_node_child_count(self_: TSNode) -> u32; } extern "C" { - #[doc = " Get the node's *named* child at the given index."] - #[doc = ""] - #[doc = " See also `ts_node_is_named`."] - pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; + #[doc = " Get the node's *named* child at the given index.\n\n See also [`ts_node_is_named`]."] + pub fn ts_node_named_child(self_: TSNode, child_index: u32) -> TSNode; } extern "C" { - #[doc = " Get the node's number of *named* children."] - #[doc = ""] - #[doc = " See also `ts_node_is_named`."] - pub fn ts_node_named_child_count(arg1: TSNode) -> u32; + #[doc = " Get the node's number of *named* children.\n\n See also [`ts_node_is_named`]."] + pub fn ts_node_named_child_count(self_: TSNode) -> u32; } extern "C" { #[doc = " Get the node's child with the given field name."] pub fn ts_node_child_by_field_name( self_: TSNode, - field_name: *const ::std::os::raw::c_char, - field_name_length: u32, + name: *const ::std::os::raw::c_char, + name_length: u32, ) -> TSNode; } extern "C" { - #[doc = " Get the node's child with the given numerical field id."] - #[doc = ""] - #[doc = " You can convert a field name to an id using the"] - #[doc = " `ts_language_field_id_for_name` function."] - pub fn ts_node_child_by_field_id(arg1: TSNode, arg2: TSFieldId) -> TSNode; + #[doc = " Get the node's child with the given numerical field id.\n\n You can convert a field name to an id using the\n [`ts_language_field_id_for_name`] function."] + pub fn ts_node_child_by_field_id(self_: TSNode, field_id: TSFieldId) -> TSNode; } extern "C" { #[doc = " Get the node's next / previous sibling."] - pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode; + pub fn ts_node_next_sibling(self_: TSNode) -> TSNode; } extern "C" { - pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode; + pub fn ts_node_prev_sibling(self_: TSNode) -> TSNode; } extern "C" { #[doc = " Get the node's next / previous *named* sibling."] - pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode; + pub fn ts_node_next_named_sibling(self_: TSNode) -> TSNode; } extern "C" { - pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode; + pub fn ts_node_prev_named_sibling(self_: TSNode) -> TSNode; } extern "C" { #[doc = " Get the node's first child that extends beyond the given byte offset."] - pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; + pub fn ts_node_first_child_for_byte(self_: TSNode, byte: u32) -> TSNode; } extern "C" { #[doc = " Get the node's first named child that extends beyond the given byte offset."] - pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; + pub fn ts_node_first_named_child_for_byte(self_: TSNode, byte: u32) -> TSNode; } extern "C" { - #[doc = " Get the smallest node within this node that spans the given range of bytes"] - #[doc = " or (row, column) positions."] - pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; + #[doc = " Get the node's number of descendants, including one for the node itself."] + pub fn ts_node_descendant_count(self_: TSNode) -> u32; } extern "C" { - pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint) - -> TSNode; + #[doc = " Get the smallest node within this node that spans the given range of bytes\n or (row, column) positions."] + pub fn ts_node_descendant_for_byte_range(self_: TSNode, start: u32, end: u32) -> TSNode; } extern "C" { - #[doc = " Get the smallest named node within this node that spans the given range of"] - #[doc = " bytes or (row, column) positions."] - pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; + pub fn ts_node_descendant_for_point_range( + self_: TSNode, + start: TSPoint, + end: TSPoint, + ) -> TSNode; +} +extern "C" { + #[doc = " Get the smallest named node within this node that spans the given range of\n bytes or (row, column) positions."] + pub fn ts_node_named_descendant_for_byte_range(self_: TSNode, start: u32, end: u32) -> TSNode; } extern "C" { pub fn ts_node_named_descendant_for_point_range( - arg1: TSNode, - arg2: TSPoint, - arg3: TSPoint, + self_: TSNode, + start: TSPoint, + end: TSPoint, ) -> TSNode; } extern "C" { - #[doc = " Edit the node to keep it in-sync with source code that has been edited."] - #[doc = ""] - #[doc = " This function is only rarely needed. When you edit a syntax tree with the"] - #[doc = " `ts_tree_edit` function, all of the nodes that you retrieve from the tree"] - #[doc = " afterward will already reflect the edit. You only need to use `ts_node_edit`"] - #[doc = " when you have a `TSNode` instance that you want to keep and continue to use"] - #[doc = " after an edit."] - pub fn ts_node_edit(arg1: *mut TSNode, arg2: *const TSInputEdit); + #[doc = " Edit the node to keep it in-sync with source code that has been edited.\n\n This function is only rarely needed. When you edit a syntax tree with the\n [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree\n afterward will already reflect the edit. You only need to use [`ts_node_edit`]\n when you have a [`TSNode`] instance that you want to keep and continue to use\n after an edit."] + pub fn ts_node_edit(self_: *mut TSNode, edit: *const TSInputEdit); } extern "C" { #[doc = " Check if two nodes are identical."] - pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool; + pub fn ts_node_eq(self_: TSNode, other: TSNode) -> bool; } extern "C" { - #[doc = " Create a new tree cursor starting from the given node."] - #[doc = ""] - #[doc = " A tree cursor allows you to walk a syntax tree more efficiently than is"] - #[doc = " possible using the `TSNode` functions. It is a mutable object that is always"] - #[doc = " on a certain syntax node, and can be moved imperatively to different nodes."] - pub fn ts_tree_cursor_new(arg1: TSNode) -> TSTreeCursor; + #[doc = " Create a new tree cursor starting from the given node.\n\n A tree cursor allows you to walk a syntax tree more efficiently than is\n possible using the [`TSNode`] functions. It is a mutable object that is always\n on a certain syntax node, and can be moved imperatively to different nodes."] + pub fn ts_tree_cursor_new(node: TSNode) -> TSTreeCursor; } extern "C" { #[doc = " Delete a tree cursor, freeing all of the memory that it used."] - pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor); + pub fn ts_tree_cursor_delete(self_: *mut TSTreeCursor); } extern "C" { #[doc = " Re-initialize a tree cursor to start at a different node."] - pub fn ts_tree_cursor_reset(arg1: *mut TSTreeCursor, arg2: TSNode); + pub fn ts_tree_cursor_reset(self_: *mut TSTreeCursor, node: TSNode); +} +extern "C" { + #[doc = " Re-initialize a tree cursor to the same position as another cursor.\n\n Unlike [`ts_tree_cursor_reset`], this will not lose parent information and\n allows reusing already created cursors."] + pub fn ts_tree_cursor_reset_to(dst: *mut TSTreeCursor, src: *const TSTreeCursor); } extern "C" { #[doc = " Get the tree cursor's current node."] - pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; + pub fn ts_tree_cursor_current_node(self_: *const TSTreeCursor) -> TSNode; } extern "C" { - #[doc = " Get the field name of the tree cursor's current node."] - #[doc = ""] - #[doc = " This returns `NULL` if the current node doesn't have a field."] - #[doc = " See also `ts_node_child_by_field_name`."] + #[doc = " Get the field name of the tree cursor's current node.\n\n This returns `NULL` if the current node doesn't have a field.\n See also [`ts_node_child_by_field_name`]."] pub fn ts_tree_cursor_current_field_name( - arg1: *const TSTreeCursor, + self_: *const TSTreeCursor, ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Get the field id of the tree cursor's current node."] - #[doc = ""] - #[doc = " This returns zero if the current node doesn't have a field."] - #[doc = " See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`."] - pub fn ts_tree_cursor_current_field_id(arg1: *const TSTreeCursor) -> TSFieldId; + #[doc = " Get the field id of the tree cursor's current node.\n\n This returns zero if the current node doesn't have a field.\n See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`]."] + pub fn ts_tree_cursor_current_field_id(self_: *const TSTreeCursor) -> TSFieldId; +} +extern "C" { + #[doc = " Move the cursor to the parent of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there was no parent node (the cursor was already on the root node)."] + pub fn ts_tree_cursor_goto_parent(self_: *mut TSTreeCursor) -> bool; +} +extern "C" { + #[doc = " Move the cursor to the next sibling of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there was no next sibling node."] + pub fn ts_tree_cursor_goto_next_sibling(self_: *mut TSTreeCursor) -> bool; +} +extern "C" { + #[doc = " Move the cursor to the previous sibling of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false` if\n there was no previous sibling node.\n\n Note, that this function may be slower than\n [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In\n the worst case, this will need to iterate through all the children upto the\n previous sibling node to recalculate its position."] + pub fn ts_tree_cursor_goto_previous_sibling(self_: *mut TSTreeCursor) -> bool; } extern "C" { - #[doc = " Move the cursor to the parent of its current node."] - #[doc = ""] - #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"] - #[doc = " if there was no parent node (the cursor was already on the root node)."] - pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool; + #[doc = " Move the cursor to the first child of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there were no children."] + pub fn ts_tree_cursor_goto_first_child(self_: *mut TSTreeCursor) -> bool; } extern "C" { - #[doc = " Move the cursor to the next sibling of its current node."] - #[doc = ""] - #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"] - #[doc = " if there was no next sibling node."] - pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool; + #[doc = " Move the cursor to the last child of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false` if\n there were no children.\n\n Note that this function may be slower than [`ts_tree_cursor_goto_first_child`]\n because it needs to iterate through all the children to compute the child's\n position."] + pub fn ts_tree_cursor_goto_last_child(self_: *mut TSTreeCursor) -> bool; } extern "C" { - #[doc = " Move the cursor to the first child of its current node."] - #[doc = ""] - #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"] - #[doc = " if there were no children."] - pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool; + #[doc = " Move the cursor to the node that is the nth descendant of\n the original node that the cursor was constructed with, where\n zero represents the original node itself."] + pub fn ts_tree_cursor_goto_descendant(self_: *mut TSTreeCursor, goal_descendant_index: u32); } extern "C" { - #[doc = " Move the cursor to the first child of its current node that extends beyond"] - #[doc = " the given byte offset or point."] - #[doc = ""] - #[doc = " This returns the index of the child node if one was found, and returns -1"] - #[doc = " if no such child was found."] - pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64; + #[doc = " Get the index of the cursor's current node out of all of the\n descendants of the original node that the cursor was constructed with."] + pub fn ts_tree_cursor_current_descendant_index(self_: *const TSTreeCursor) -> u32; } extern "C" { - pub fn ts_tree_cursor_goto_first_child_for_point(arg1: *mut TSTreeCursor, arg2: TSPoint) - -> i64; + #[doc = " Get the depth of the cursor's current node relative to the original\n node that the cursor was constructed with."] + pub fn ts_tree_cursor_current_depth(self_: *const TSTreeCursor) -> u32; } extern "C" { - pub fn ts_tree_cursor_copy(arg1: *const TSTreeCursor) -> TSTreeCursor; + #[doc = " Move the cursor to the first child of its current node that extends beyond\n the given byte offset or point.\n\n This returns the index of the child node if one was found, and returns -1\n if no such child was found."] + pub fn ts_tree_cursor_goto_first_child_for_byte( + self_: *mut TSTreeCursor, + goal_byte: u32, + ) -> i64; } extern "C" { - #[doc = " Create a new query from a string containing one or more S-expression"] - #[doc = " patterns. The query is associated with a particular language, and can"] - #[doc = " only be run on syntax nodes parsed with that language."] - #[doc = ""] - #[doc = " If all of the given patterns are valid, this returns a `TSQuery`."] - #[doc = " If a pattern is invalid, this returns `NULL`, and provides two pieces"] - #[doc = " of information about the problem:"] - #[doc = " 1. The byte offset of the error is written to the `error_offset` parameter."] - #[doc = " 2. The type of error is written to the `error_type` parameter."] + pub fn ts_tree_cursor_goto_first_child_for_point( + self_: *mut TSTreeCursor, + goal_point: TSPoint, + ) -> i64; +} +extern "C" { + pub fn ts_tree_cursor_copy(cursor: *const TSTreeCursor) -> TSTreeCursor; +} +extern "C" { + #[doc = " Create a new query from a string containing one or more S-expression\n patterns. The query is associated with a particular language, and can\n only be run on syntax nodes parsed with that language.\n\n If all of the given patterns are valid, this returns a [`TSQuery`].\n If a pattern is invalid, this returns `NULL`, and provides two pieces\n of information about the problem:\n 1. The byte offset of the error is written to the `error_offset` parameter.\n 2. The type of error is written to the `error_type` parameter."] pub fn ts_query_new( language: *const TSLanguage, source: *const ::std::os::raw::c_char, @@ -637,187 +543,152 @@ extern "C" { } extern "C" { #[doc = " Delete a query, freeing all of the memory that it used."] - pub fn ts_query_delete(arg1: *mut TSQuery); + pub fn ts_query_delete(self_: *mut TSQuery); } extern "C" { #[doc = " Get the number of patterns, captures, or string literals in the query."] - pub fn ts_query_pattern_count(arg1: *const TSQuery) -> u32; + pub fn ts_query_pattern_count(self_: *const TSQuery) -> u32; } extern "C" { - pub fn ts_query_capture_count(arg1: *const TSQuery) -> u32; + pub fn ts_query_capture_count(self_: *const TSQuery) -> u32; } extern "C" { - pub fn ts_query_string_count(arg1: *const TSQuery) -> u32; + pub fn ts_query_string_count(self_: *const TSQuery) -> u32; } extern "C" { - #[doc = " Get the byte offset where the given pattern starts in the query's source."] - #[doc = ""] - #[doc = " This can be useful when combining queries by concatenating their source"] - #[doc = " code strings."] - pub fn ts_query_start_byte_for_pattern(arg1: *const TSQuery, arg2: u32) -> u32; + #[doc = " Get the byte offset where the given pattern starts in the query's source.\n\n This can be useful when combining queries by concatenating their source\n code strings."] + pub fn ts_query_start_byte_for_pattern(self_: *const TSQuery, pattern_index: u32) -> u32; } extern "C" { - #[doc = " Get all of the predicates for the given pattern in the query."] - #[doc = ""] - #[doc = " The predicates are represented as a single array of steps. There are three"] - #[doc = " types of steps in this array, which correspond to the three legal values for"] - #[doc = " the `type` field:"] - #[doc = " - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names"] - #[doc = " of captures. Their `value_id` can be used with the"] - #[doc = " `ts_query_capture_name_for_id` function to obtain the name of the capture."] - #[doc = " - `TSQueryPredicateStepTypeString` - Steps with this type represent literal"] - #[doc = " strings. Their `value_id` can be used with the"] - #[doc = " `ts_query_string_value_for_id` function to obtain their string value."] - #[doc = " - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*"] - #[doc = " that represent the end of an individual predicate. If a pattern has two"] - #[doc = " predicates, then there will be two steps with this `type` in the array."] + #[doc = " Get all of the predicates for the given pattern in the query.\n\n The predicates are represented as a single array of steps. There are three\n types of steps in this array, which correspond to the three legal values for\n the `type` field:\n - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names\n of captures. Their `value_id` can be used with the\n [`ts_query_capture_name_for_id`] function to obtain the name of the capture.\n - `TSQueryPredicateStepTypeString` - Steps with this type represent literal\n strings. Their `value_id` can be used with the\n [`ts_query_string_value_for_id`] function to obtain their string value.\n - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*\n that represent the end of an individual predicate. If a pattern has two\n predicates, then there will be two steps with this `type` in the array."] pub fn ts_query_predicates_for_pattern( self_: *const TSQuery, pattern_index: u32, - length: *mut u32, + step_count: *mut u32, ) -> *const TSQueryPredicateStep; } extern "C" { - pub fn ts_query_is_pattern_non_local(self_: *const TSQuery, pattern_index: u32) -> bool; + pub fn ts_query_is_pattern_rooted(self_: *const TSQuery, pattern_index: u32) -> bool; } extern "C" { - pub fn ts_query_is_pattern_rooted(self_: *const TSQuery, pattern_index: u32) -> bool; + pub fn ts_query_is_pattern_non_local(self_: *const TSQuery, pattern_index: u32) -> bool; } extern "C" { pub fn ts_query_is_pattern_guaranteed_at_step(self_: *const TSQuery, byte_offset: u32) -> bool; } extern "C" { - #[doc = " Get the name and length of one of the query's captures, or one of the"] - #[doc = " query's string literals. Each capture and string is associated with a"] - #[doc = " numeric id based on the order that it appeared in the query's source."] + #[doc = " Get the name and length of one of the query's captures, or one of the\n query's string literals. Each capture and string is associated with a\n numeric id based on the order that it appeared in the query's source."] pub fn ts_query_capture_name_for_id( - arg1: *const TSQuery, - id: u32, + self_: *const TSQuery, + index: u32, length: *mut u32, ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Get the quantifier of the query's captures. Each capture is * associated"] - #[doc = " with a numeric id based on the order that it appeared in the query's source."] + #[doc = " Get the quantifier of the query's captures. Each capture is * associated\n with a numeric id based on the order that it appeared in the query's source."] pub fn ts_query_capture_quantifier_for_id( - arg1: *const TSQuery, - pattern_id: u32, - capture_id: u32, + self_: *const TSQuery, + pattern_index: u32, + capture_index: u32, ) -> TSQuantifier; } extern "C" { pub fn ts_query_string_value_for_id( - arg1: *const TSQuery, - id: u32, + self_: *const TSQuery, + index: u32, length: *mut u32, ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Disable a certain capture within a query."] - #[doc = ""] - #[doc = " This prevents the capture from being returned in matches, and also avoids"] - #[doc = " any resource usage associated with recording the capture. Currently, there"] - #[doc = " is no way to undo this."] + #[doc = " Disable a certain capture within a query.\n\n This prevents the capture from being returned in matches, and also avoids\n any resource usage associated with recording the capture. Currently, there\n is no way to undo this."] pub fn ts_query_disable_capture( - arg1: *mut TSQuery, - arg2: *const ::std::os::raw::c_char, - arg3: u32, + self_: *mut TSQuery, + name: *const ::std::os::raw::c_char, + length: u32, ); } extern "C" { - #[doc = " Disable a certain pattern within a query."] - #[doc = ""] - #[doc = " This prevents the pattern from matching and removes most of the overhead"] - #[doc = " associated with the pattern. Currently, there is no way to undo this."] - pub fn ts_query_disable_pattern(arg1: *mut TSQuery, arg2: u32); -} -extern "C" { - #[doc = " Create a new cursor for executing a given query."] - #[doc = ""] - #[doc = " The cursor stores the state that is needed to iteratively search"] - #[doc = " for matches. To use the query cursor, first call `ts_query_cursor_exec`"] - #[doc = " to start running a given query on a given syntax node. Then, there are"] - #[doc = " two options for consuming the results of the query:"] - #[doc = " 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the"] - #[doc = " *matches* in the order that they were found. Each match contains the"] - #[doc = " index of the pattern that matched, and an array of captures. Because"] - #[doc = " multiple patterns can match the same set of nodes, one match may contain"] - #[doc = " captures that appear *before* some of the captures from a previous match."] - #[doc = " 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the"] - #[doc = " individual *captures* in the order that they appear. This is useful if"] - #[doc = " don't care about which pattern matched, and just want a single ordered"] - #[doc = " sequence of captures."] - #[doc = ""] - #[doc = " If you don't care about consuming all of the results, you can stop calling"] - #[doc = " `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point."] - #[doc = " You can then start executing another query on another node by calling"] - #[doc = " `ts_query_cursor_exec` again."] + #[doc = " Disable a certain pattern within a query.\n\n This prevents the pattern from matching and removes most of the overhead\n associated with the pattern. Currently, there is no way to undo this."] + pub fn ts_query_disable_pattern(self_: *mut TSQuery, pattern_index: u32); +} +extern "C" { + #[doc = " Create a new cursor for executing a given query.\n\n The cursor stores the state that is needed to iteratively search\n for matches. To use the query cursor, first call [`ts_query_cursor_exec`]\n to start running a given query on a given syntax node. Then, there are\n two options for consuming the results of the query:\n 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the\n *matches* in the order that they were found. Each match contains the\n index of the pattern that matched, and an array of captures. Because\n multiple patterns can match the same set of nodes, one match may contain\n captures that appear *before* some of the captures from a previous match.\n 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the\n individual *captures* in the order that they appear. This is useful if\n don't care about which pattern matched, and just want a single ordered\n sequence of captures.\n\n If you don't care about consuming all of the results, you can stop calling\n [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point.\n You can then start executing another query on another node by calling\n [`ts_query_cursor_exec`] again."] pub fn ts_query_cursor_new() -> *mut TSQueryCursor; } extern "C" { #[doc = " Delete a query cursor, freeing all of the memory that it used."] - pub fn ts_query_cursor_delete(arg1: *mut TSQueryCursor); + pub fn ts_query_cursor_delete(self_: *mut TSQueryCursor); } extern "C" { #[doc = " Start running a given query on a given node."] - pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode); + pub fn ts_query_cursor_exec(self_: *mut TSQueryCursor, query: *const TSQuery, node: TSNode); } extern "C" { - #[doc = " Manage the maximum number of in-progress matches allowed by this query"] - #[doc = " cursor."] - #[doc = ""] - #[doc = " Query cursors have an optional maximum capacity for storing lists of"] - #[doc = " in-progress captures. If this capacity is exceeded, then the"] - #[doc = " earliest-starting match will silently be dropped to make room for further"] - #[doc = " matches. This maximum capacity is optional — by default, query cursors allow"] - #[doc = " any number of pending matches, dynamically allocating new space for them as"] - #[doc = " needed as the query is executed."] - pub fn ts_query_cursor_did_exceed_match_limit(arg1: *const TSQueryCursor) -> bool; + #[doc = " Manage the maximum number of in-progress matches allowed by this query\n cursor.\n\n Query cursors have an optional maximum capacity for storing lists of\n in-progress captures. If this capacity is exceeded, then the\n earliest-starting match will silently be dropped to make room for further\n matches. This maximum capacity is optional — by default, query cursors allow\n any number of pending matches, dynamically allocating new space for them as\n needed as the query is executed."] + pub fn ts_query_cursor_did_exceed_match_limit(self_: *const TSQueryCursor) -> bool; } extern "C" { - pub fn ts_query_cursor_match_limit(arg1: *const TSQueryCursor) -> u32; + pub fn ts_query_cursor_match_limit(self_: *const TSQueryCursor) -> u32; } extern "C" { - pub fn ts_query_cursor_set_match_limit(arg1: *mut TSQueryCursor, arg2: u32); + pub fn ts_query_cursor_set_match_limit(self_: *mut TSQueryCursor, limit: u32); } extern "C" { - #[doc = " Set the range of bytes or (row, column) positions in which the query"] - #[doc = " will be executed."] - pub fn ts_query_cursor_set_byte_range(arg1: *mut TSQueryCursor, arg2: u32, arg3: u32); + #[doc = " Set the range of bytes or (row, column) positions in which the query\n will be executed."] + pub fn ts_query_cursor_set_byte_range( + self_: *mut TSQueryCursor, + start_byte: u32, + end_byte: u32, + ); } extern "C" { - pub fn ts_query_cursor_set_point_range(arg1: *mut TSQueryCursor, arg2: TSPoint, arg3: TSPoint); + pub fn ts_query_cursor_set_point_range( + self_: *mut TSQueryCursor, + start_point: TSPoint, + end_point: TSPoint, + ); } extern "C" { - #[doc = " Advance to the next match of the currently running query."] - #[doc = ""] - #[doc = " If there is a match, write it to `*match` and return `true`."] - #[doc = " Otherwise, return `false`."] - pub fn ts_query_cursor_next_match(arg1: *mut TSQueryCursor, match_: *mut TSQueryMatch) -> bool; + #[doc = " Advance to the next match of the currently running query.\n\n If there is a match, write it to `*match` and return `true`.\n Otherwise, return `false`."] + pub fn ts_query_cursor_next_match(self_: *mut TSQueryCursor, match_: *mut TSQueryMatch) + -> bool; } extern "C" { - pub fn ts_query_cursor_remove_match(arg1: *mut TSQueryCursor, id: u32); + pub fn ts_query_cursor_remove_match(self_: *mut TSQueryCursor, match_id: u32); } extern "C" { - #[doc = " Advance to the next capture of the currently running query."] - #[doc = ""] - #[doc = " If there is a capture, write its match to `*match` and its index within"] - #[doc = " the matche's capture list to `*capture_index`. Otherwise, return `false`."] + #[doc = " Advance to the next capture of the currently running query.\n\n If there is a capture, write its match to `*match` and its index within\n the matche's capture list to `*capture_index`. Otherwise, return `false`."] pub fn ts_query_cursor_next_capture( - arg1: *mut TSQueryCursor, + self_: *mut TSQueryCursor, match_: *mut TSQueryMatch, capture_index: *mut u32, ) -> bool; } +extern "C" { + #[doc = " Set the maximum start depth for a query cursor.\n\n This prevents cursors from exploring children nodes at a certain depth.\n Note if a pattern includes many children, then they will still be checked.\n\n The zero max start depth value can be used as a special behavior and\n it helps to destructure a subtree by staying on a node and using captures\n for interested parts. Note that the zero max start depth only limit a search\n depth for a pattern's root node but other nodes that are parts of the pattern\n may be searched at any depth what defined by the pattern structure.\n\n Set to `UINT32_MAX` to remove the maximum start depth."] + pub fn ts_query_cursor_set_max_start_depth(self_: *mut TSQueryCursor, max_start_depth: u32); +} +extern "C" { + #[doc = " Get another reference to the given language."] + pub fn ts_language_copy(self_: *const TSLanguage) -> *const TSLanguage; +} +extern "C" { + #[doc = " Free any dynamically-allocated resources for this language, if\n this is the last reference."] + pub fn ts_language_delete(self_: *const TSLanguage); +} extern "C" { #[doc = " Get the number of distinct node types in the language."] - pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32; + pub fn ts_language_symbol_count(self_: *const TSLanguage) -> u32; +} +extern "C" { + #[doc = " Get the number of valid states in this language."] + pub fn ts_language_state_count(self_: *const TSLanguage) -> u32; } extern "C" { #[doc = " Get a node type string for the given numerical id."] pub fn ts_language_symbol_name( - arg1: *const TSLanguage, - arg2: TSSymbol, + self_: *const TSLanguage, + symbol: TSSymbol, ) -> *const ::std::os::raw::c_char; } extern "C" { @@ -831,53 +702,145 @@ extern "C" { } extern "C" { #[doc = " Get the number of distinct field names in the language."] - pub fn ts_language_field_count(arg1: *const TSLanguage) -> u32; + pub fn ts_language_field_count(self_: *const TSLanguage) -> u32; } extern "C" { #[doc = " Get the field name string for the given numerical id."] pub fn ts_language_field_name_for_id( - arg1: *const TSLanguage, - arg2: TSFieldId, + self_: *const TSLanguage, + id: TSFieldId, ) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Get the numerical id for the given field name string."] pub fn ts_language_field_id_for_name( - arg1: *const TSLanguage, - arg2: *const ::std::os::raw::c_char, - arg3: u32, + self_: *const TSLanguage, + name: *const ::std::os::raw::c_char, + name_length: u32, ) -> TSFieldId; } extern "C" { - #[doc = " Check whether the given node type id belongs to named nodes, anonymous nodes,"] - #[doc = " or a hidden nodes."] - #[doc = ""] - #[doc = " See also `ts_node_is_named`. Hidden nodes are never returned from the API."] - pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType; -} -extern "C" { - #[doc = " Get the ABI version number for this language. This version number is used"] - #[doc = " to ensure that languages were generated by a compatible version of"] - #[doc = " Tree-sitter."] - #[doc = ""] - #[doc = " See also `ts_parser_set_language`."] - pub fn ts_language_version(arg1: *const TSLanguage) -> u32; -} -extern "C" { - #[doc = " Set the allocation functions used by the library."] - #[doc = ""] - #[doc = " By default, Tree-sitter uses the standard libc allocation functions,"] - #[doc = " but aborts the process when an allocation fails. This function lets"] - #[doc = " you supply alternative allocation functions at runtime."] - #[doc = ""] - #[doc = " If you pass `NULL` for any parameter, Tree-sitter will switch back to"] - #[doc = " its default implementation of that function."] - #[doc = ""] - #[doc = " If you call this function after the library has already been used, then"] - #[doc = " you must ensure that either:"] - #[doc = " 1. All the existing objects have been freed."] - #[doc = " 2. The new allocator shares its state with the old one, so it is capable"] - #[doc = " of freeing memory that was allocated by the old allocator."] + #[doc = " Check whether the given node type id belongs to named nodes, anonymous nodes,\n or a hidden nodes.\n\n See also [`ts_node_is_named`]. Hidden nodes are never returned from the API."] + pub fn ts_language_symbol_type(self_: *const TSLanguage, symbol: TSSymbol) -> TSSymbolType; +} +extern "C" { + #[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."] + pub fn ts_language_version(self_: *const TSLanguage) -> u32; +} +extern "C" { + #[doc = " Get the next parse state. Combine this with lookahead iterators to generate\n completion suggestions or valid symbols in error nodes. Use\n [`ts_node_grammar_symbol`] for valid symbols."] + pub fn ts_language_next_state( + self_: *const TSLanguage, + state: TSStateId, + symbol: TSSymbol, + ) -> TSStateId; +} +extern "C" { + #[doc = " Create a new lookahead iterator for the given language and parse state.\n\n This returns `NULL` if state is invalid for the language.\n\n Repeatedly using [`ts_lookahead_iterator_next`] and\n [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the\n given parse state. Newly created lookahead iterators will contain the `ERROR`\n symbol.\n\n Lookahead iterators can be useful to generate suggestions and improve syntax\n error diagnostics. To get symbols valid in an ERROR node, use the lookahead\n iterator on its first leaf node state. For `MISSING` nodes, a lookahead\n iterator created on the previous non-extra leaf node may be appropriate."] + pub fn ts_lookahead_iterator_new( + self_: *const TSLanguage, + state: TSStateId, + ) -> *mut TSLookaheadIterator; +} +extern "C" { + #[doc = " Delete a lookahead iterator freeing all the memory used."] + pub fn ts_lookahead_iterator_delete(self_: *mut TSLookaheadIterator); +} +extern "C" { + #[doc = " Reset the lookahead iterator to another state.\n\n This returns `true` if the iterator was reset to the given state and `false`\n otherwise."] + pub fn ts_lookahead_iterator_reset_state( + self_: *mut TSLookaheadIterator, + state: TSStateId, + ) -> bool; +} +extern "C" { + #[doc = " Reset the lookahead iterator.\n\n This returns `true` if the language was set successfully and `false`\n otherwise."] + pub fn ts_lookahead_iterator_reset( + self_: *mut TSLookaheadIterator, + language: *const TSLanguage, + state: TSStateId, + ) -> bool; +} +extern "C" { + #[doc = " Get the current language of the lookahead iterator."] + pub fn ts_lookahead_iterator_language(self_: *const TSLookaheadIterator) -> *const TSLanguage; +} +extern "C" { + #[doc = " Advance the lookahead iterator to the next symbol.\n\n This returns `true` if there is a new symbol and `false` otherwise."] + pub fn ts_lookahead_iterator_next(self_: *mut TSLookaheadIterator) -> bool; +} +extern "C" { + #[doc = " Get the current symbol of the lookahead iterator;"] + pub fn ts_lookahead_iterator_current_symbol(self_: *const TSLookaheadIterator) -> TSSymbol; +} +extern "C" { + #[doc = " Get the current symbol type of the lookahead iterator as a null terminated\n string."] + pub fn ts_lookahead_iterator_current_symbol_name( + self_: *const TSLookaheadIterator, + ) -> *const ::std::os::raw::c_char; +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct wasm_engine_t { + _unused: [u8; 0], +} +pub type TSWasmEngine = wasm_engine_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSWasmStore { + _unused: [u8; 0], +} +pub const TSWasmErrorKindNone: TSWasmErrorKind = 0; +pub const TSWasmErrorKindParse: TSWasmErrorKind = 1; +pub const TSWasmErrorKindCompile: TSWasmErrorKind = 2; +pub const TSWasmErrorKindInstantiate: TSWasmErrorKind = 3; +pub const TSWasmErrorKindAllocate: TSWasmErrorKind = 4; +pub type TSWasmErrorKind = ::std::os::raw::c_uint; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSWasmError { + pub kind: TSWasmErrorKind, + pub message: *mut ::std::os::raw::c_char, +} +extern "C" { + #[doc = " Create a Wasm store."] + pub fn ts_wasm_store_new( + engine: *mut TSWasmEngine, + error: *mut TSWasmError, + ) -> *mut TSWasmStore; +} +extern "C" { + #[doc = " Free the memory associated with the given Wasm store."] + pub fn ts_wasm_store_delete(arg1: *mut TSWasmStore); +} +extern "C" { + #[doc = " Create a language from a buffer of Wasm. The resulting language behaves\n like any other Tree-sitter language, except that in order to use it with\n a parser, that parser must have a Wasm store. Note that the language\n can be used with any Wasm store, it doesn't need to be the same store that\n was used to originally load it."] + pub fn ts_wasm_store_load_language( + arg1: *mut TSWasmStore, + name: *const ::std::os::raw::c_char, + wasm: *const ::std::os::raw::c_char, + wasm_len: u32, + error: *mut TSWasmError, + ) -> *const TSLanguage; +} +extern "C" { + #[doc = " Get the number of languages instantiated in the given wasm store."] + pub fn ts_wasm_store_language_count(arg1: *const TSWasmStore) -> usize; +} +extern "C" { + #[doc = " Check if the language came from a Wasm module. If so, then in order to use\n this language with a Parser, that parser must have a Wasm store assigned."] + pub fn ts_language_is_wasm(arg1: *const TSLanguage) -> bool; +} +extern "C" { + #[doc = " Assign the given Wasm store to the parser. A parser must have a Wasm store\n in order to use Wasm languages."] + pub fn ts_parser_set_wasm_store(arg1: *mut TSParser, arg2: *mut TSWasmStore); +} +extern "C" { + #[doc = " Remove the parser's current Wasm store and return it. This returns NULL if\n the parser doesn't have a Wasm store."] + pub fn ts_parser_take_wasm_store(arg1: *mut TSParser) -> *mut TSWasmStore; +} +extern "C" { + #[doc = " Set the allocation functions used by the library.\n\n By default, Tree-sitter uses the standard libc allocation functions,\n but aborts the process when an allocation fails. This function lets\n you supply alternative allocation functions at runtime.\n\n If you pass `NULL` for any parameter, Tree-sitter will switch back to\n its default implementation of that function.\n\n If you call this function after the library has already been used, then\n you must ensure that either:\n 1. All the existing objects have been freed.\n 2. The new allocator shares its state with the old one, so it is capable\n of freeing memory that was allocated by the old allocator."] pub fn ts_set_allocator( new_malloc: ::std::option::Option< unsafe extern "C" fn(arg1: usize) -> *mut ::std::os::raw::c_void, @@ -894,6 +857,3 @@ extern "C" { new_free: ::std::option::Option, ); } - -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 14; -pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 13; diff --git a/lib/binding_rust/build.rs b/lib/binding_rust/build.rs index 5798cde..19cb0f6 100644 --- a/lib/binding_rust/build.rs +++ b/lib/binding_rust/build.rs @@ -1,47 +1,88 @@ -use std::path::{Path, PathBuf}; -use std::{env, fs}; +use std::{ + env, fs, + path::{Path, PathBuf}, +}; fn main() { - println!("cargo:rerun-if-env-changed=TREE_SITTER_STATIC_ANALYSIS"); - if env::var("TREE_SITTER_STATIC_ANALYSIS").is_ok() { - if let (Some(clang_path), Some(scan_build_path)) = (which("clang"), which("scan-build")) { - let clang_path = clang_path.to_str().unwrap(); - let scan_build_path = scan_build_path.to_str().unwrap(); - env::set_var( - "CC", - &format!( - "{} -analyze-headers --use-analyzer={} cc", - scan_build_path, clang_path - ), - ); - } + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + + #[cfg(feature = "bindgen")] + generate_bindings(&out_dir); + + fs::copy( + "src/wasm/stdlib-symbols.txt", + out_dir.join("stdlib-symbols.txt"), + ) + .unwrap(); + + let mut config = cc::Build::new(); + + println!("cargo:rerun-if-env-changed=CARGO_FEATURE_WASM"); + if env::var("CARGO_FEATURE_WASM").is_ok() { + config + .define("TREE_SITTER_FEATURE_WASM", "") + .define("static_assert(...)", "") + .include(env::var("DEP_WASMTIME_C_API_INCLUDE").unwrap()) + .include(env::var("DEP_WASMTIME_C_API_WASM_INCLUDE").unwrap()); } - let src_path = Path::new("src"); + let manifest_path = Path::new(env!("CARGO_MANIFEST_DIR")); + let include_path = manifest_path.join("include"); + let src_path = manifest_path.join("src"); + let wasm_path = src_path.join("wasm"); for entry in fs::read_dir(&src_path).unwrap() { let entry = entry.unwrap(); let path = src_path.join(entry.file_name()); println!("cargo:rerun-if-changed={}", path.to_str().unwrap()); } - cc::Build::new() - .flag_if_supported("-std=c99") + config + .flag_if_supported("-std=c11") + .flag_if_supported("-fvisibility=hidden") + .flag_if_supported("-Wshadow") .flag_if_supported("-Wno-unused-parameter") - .include(src_path) - .include("include") + .include(&src_path) + .include(&wasm_path) + .include(&include_path) .file(src_path.join("lib.c")) .compile("tree-sitter"); + + println!("cargo:include={}", include_path.display()); } -fn which(exe_name: impl AsRef) -> Option { - env::var_os("PATH").and_then(|paths| { - env::split_paths(&paths).find_map(|dir| { - let full_path = dir.join(&exe_name); - if full_path.is_file() { - Some(full_path) - } else { - None - } - }) - }) +#[cfg(feature = "bindgen")] +fn generate_bindings(out_dir: &Path) { + const HEADER_PATH: &str = "include/tree_sitter/api.h"; + + println!("cargo:rerun-if-changed={HEADER_PATH}"); + + let no_copy = [ + "TSInput", + "TSLanguage", + "TSLogger", + "TSLookaheadIterator", + "TSParser", + "TSTree", + "TSQuery", + "TSQueryCursor", + "TSQueryCapture", + "TSQueryMatch", + "TSQueryPredicateStep", + ]; + + let bindings = bindgen::Builder::default() + .header(HEADER_PATH) + .layout_tests(false) + .allowlist_type("^TS.*") + .allowlist_function("^ts_.*") + .allowlist_var("^TREE_SITTER.*") + .no_copy(no_copy.join("|")) + .prepend_enum_name(false) + .generate() + .expect("Failed to generate bindings"); + + let bindings_rs = out_dir.join("bindings.rs"); + bindings + .write_to_file(&bindings_rs) + .unwrap_or_else(|_| panic!("Failed to write bindings into path: {bindings_rs:?}")); } diff --git a/lib/binding_rust/ffi.rs b/lib/binding_rust/ffi.rs index 685ed76..9168de1 100644 --- a/lib/binding_rust/ffi.rs +++ b/lib/binding_rust/ffi.rs @@ -2,8 +2,175 @@ #![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] +#[cfg(feature = "bindgen")] +include!(concat!(env!("OUT_DIR"), "/bindings.rs")); + +#[cfg(not(feature = "bindgen"))] include!("./bindings.rs"); +#[cfg(any(unix, target_os = "wasi"))] +extern "C" { + pub(crate) fn _ts_dup(fd: std::os::raw::c_int) -> std::os::raw::c_int; +} + +#[cfg(windows)] extern "C" { - pub(crate) fn dup(fd: std::os::raw::c_int) -> std::os::raw::c_int; + pub(crate) fn _ts_dup(handle: *mut std::os::raw::c_void) -> std::os::raw::c_int; +} + +use std::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull, str}; + +use crate::{ + Language, LookaheadIterator, Node, Parser, Query, QueryCursor, QueryError, Tree, TreeCursor, +}; + +impl Language { + /// Reconstructs a [`Language`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(ptr: *const TSLanguage) -> Self { + Self(ptr) + } + + /// Consumes the [`Language`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> *const TSLanguage { + ManuallyDrop::new(self).0 + } +} + +impl Parser { + /// Reconstructs a [`Parser`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(ptr: *mut TSParser) -> Self { + Self(NonNull::new_unchecked(ptr)) + } + + /// Consumes the [`Parser`], returning a raw pointer to the underlying C structure. + /// + /// # Safety + /// + /// It's a caller responsibility to adjust parser's state + /// like disable logging or dot graphs printing if this + /// may cause issues like use after free. + #[must_use] + pub fn into_raw(self) -> *mut TSParser { + ManuallyDrop::new(self).0.as_ptr() + } +} + +impl Tree { + /// Reconstructs a [`Tree`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(ptr: *mut TSTree) -> Self { + Self(NonNull::new_unchecked(ptr)) + } + + /// Consumes the [`Tree`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> *mut TSTree { + ManuallyDrop::new(self).0.as_ptr() + } +} + +impl<'tree> Node<'tree> { + /// Reconstructs a [`Node`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(raw: TSNode) -> Self { + Self(raw, PhantomData) + } + + /// Consumes the [`Node`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> TSNode { + ManuallyDrop::new(self).0 + } +} + +impl<'a> TreeCursor<'a> { + /// Reconstructs a [`TreeCursor`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(raw: TSTreeCursor) -> Self { + Self(raw, PhantomData) + } + + /// Consumes the [`TreeCursor`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> TSTreeCursor { + ManuallyDrop::new(self).0 + } +} + +impl Query { + /// Reconstructs a [`Query`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + pub unsafe fn from_raw(ptr: *mut TSQuery, source: &str) -> Result { + Self::from_raw_parts(ptr, source) + } + + /// Consumes the [`Query`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> *mut TSQuery { + ManuallyDrop::new(self).ptr.as_ptr() + } +} + +impl QueryCursor { + /// Reconstructs a [`QueryCursor`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(ptr: *mut TSQueryCursor) -> Self { + Self { + ptr: NonNull::new_unchecked(ptr), + } + } + + /// Consumes the [`QueryCursor`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> *mut TSQueryCursor { + ManuallyDrop::new(self).ptr.as_ptr() + } +} + +impl LookaheadIterator { + /// Reconstructs a [`LookaheadIterator`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(ptr: *mut TSLookaheadIterator) -> Self { + Self(NonNull::new_unchecked(ptr)) + } + + /// Consumes the [`LookaheadIterator`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> *mut TSLookaheadIterator { + ManuallyDrop::new(self).0.as_ptr() + } } diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 579bf8e..14a0d21 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1,23 +1,32 @@ -mod ffi; -mod util; +#![doc = include_str!("./README.md")] -#[cfg(unix)] -use std::os::unix::io::AsRawFd; +pub mod ffi; +mod util; +#[cfg(any(unix, target_os = "wasi"))] +use std::os::fd::AsRawFd; +#[cfg(windows)] +use std::os::windows::io::AsRawHandle; use std::{ char, error, ffi::CStr, - fmt, hash, iter, + fmt::{self, Write}, + hash, iter, marker::PhantomData, mem::MaybeUninit, - ops, + num::NonZeroU16, + ops::{self, Deref}, os::raw::{c_char, c_void}, ptr::{self, NonNull}, slice, str, sync::atomic::AtomicUsize, - u16, }; +#[cfg(feature = "wasm")] +mod wasm_language; +#[cfg(feature = "wasm")] +pub use wasm_language::*; + /// The latest ABI version that is supported by the current version of the /// library. /// @@ -26,22 +35,26 @@ use std::{ /// The Tree-sitter library is generally backwards-compatible with languages /// generated using older CLI versions, but is not forwards-compatible. #[doc(alias = "TREE_SITTER_LANGUAGE_VERSION")] -pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION; +pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION as usize; /// The earliest ABI version that is supported by the current version of the /// library. #[doc(alias = "TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION")] -pub const MIN_COMPATIBLE_LANGUAGE_VERSION: usize = ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION; +pub const MIN_COMPATIBLE_LANGUAGE_VERSION: usize = + ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION as usize; -pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h"); +pub const ARRAY_HEADER: &str = include_str!("../src/array.h"); +pub const PARSER_HEADER: &str = include_str!("../src/parser.h"); -/// An opaque object that defines how to parse a particular language. The code for each -/// `Language` is generated by the Tree-sitter CLI. +/// An opaque object that defines how to parse a particular language. The code +/// for each `Language` is generated by the Tree-sitter CLI. #[doc(alias = "TSLanguage")] -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[derive(Debug, PartialEq, Eq, Hash)] #[repr(transparent)] pub struct Language(*const ffi::TSLanguage); +pub struct LanguageRef<'a>(*const ffi::TSLanguage, PhantomData<&'a ()>); + /// A tree that represents the syntactic structure of a source code file. #[doc(alias = "TSTree")] pub struct Tree(NonNull); @@ -55,9 +68,9 @@ pub struct Point { pub column: usize, } -/// A range of positions in a multi-line text document, both in terms of bytes and of -/// rows and columns. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +/// A range of positions in a multi-line text document, both in terms of bytes +/// and of rows and columns. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct Range { pub start_byte: usize, pub end_byte: usize, @@ -76,16 +89,23 @@ pub struct InputEdit { pub new_end_position: Point, } -/// A single node within a syntax `Tree`. +/// A single node within a syntax [`Tree`]. #[doc(alias = "TSNode")] #[derive(Clone, Copy)] #[repr(transparent)] -pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); +pub struct Node<'tree>(ffi::TSNode, PhantomData<&'tree ()>); -/// A stateful object that this is used to produce a `Tree` based on some source code. +/// A stateful object that this is used to produce a [`Tree`] based on some +/// source code. #[doc(alias = "TSParser")] pub struct Parser(NonNull); +/// A stateful object that is used to look up symbols valid in a specific parse +/// state +#[doc(alias = "TSLookaheadIterator")] +pub struct LookaheadIterator(NonNull); +struct LookaheadNamesIterator<'a>(&'a mut LookaheadIterator); + /// A type of log message. #[derive(Debug, PartialEq, Eq)] pub enum LogType { @@ -93,24 +113,27 @@ pub enum LogType { Lex, } +type FieldId = NonZeroU16; + /// A callback that receives log messages during parser. type Logger<'a> = Box; -/// A stateful object for walking a syntax `Tree` efficiently. +/// A stateful object for walking a syntax [`Tree`] efficiently. #[doc(alias = "TSTreeCursor")] -pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); +pub struct TreeCursor<'cursor>(ffi::TSTreeCursor, PhantomData<&'cursor ()>); /// A set of patterns that match nodes in a syntax tree. #[doc(alias = "TSQuery")] #[derive(Debug)] +#[allow(clippy::type_complexity)] pub struct Query { ptr: NonNull, - capture_names: Vec, - capture_quantifiers: Vec>, - text_predicates: Vec>, - property_settings: Vec>, - property_predicates: Vec>, - general_predicates: Vec>, + capture_names: Box<[&'static str]>, + capture_quantifiers: Box<[Box<[CaptureQuantifier]>]>, + text_predicates: Box<[Box<[TextPredicateCapture]>]>, + property_settings: Box<[Box<[QueryProperty]>]>, + property_predicates: Box<[Box<[(QueryProperty, bool)]>]>, + general_predicates: Box<[Box<[QueryPredicate]>]>, } /// A quantifier for captures @@ -126,23 +149,23 @@ pub enum CaptureQuantifier { impl From for CaptureQuantifier { fn from(value: ffi::TSQuantifier) -> Self { match value { - ffi::TSQuantifier_TSQuantifierZero => CaptureQuantifier::Zero, - ffi::TSQuantifier_TSQuantifierZeroOrOne => CaptureQuantifier::ZeroOrOne, - ffi::TSQuantifier_TSQuantifierZeroOrMore => CaptureQuantifier::ZeroOrMore, - ffi::TSQuantifier_TSQuantifierOne => CaptureQuantifier::One, - ffi::TSQuantifier_TSQuantifierOneOrMore => CaptureQuantifier::OneOrMore, - _ => panic!("Unrecognized quantifier: {}", value), + ffi::TSQuantifierZero => Self::Zero, + ffi::TSQuantifierZeroOrOne => Self::ZeroOrOne, + ffi::TSQuantifierZeroOrMore => Self::ZeroOrMore, + ffi::TSQuantifierOne => Self::One, + ffi::TSQuantifierOneOrMore => Self::OneOrMore, + _ => panic!("Unrecognized quantifier: {value}"), } } } -/// A stateful object for executing a `Query` on a syntax `Tree`. +/// A stateful object for executing a [`Query`] on a syntax [`Tree`]. #[doc(alias = "TSQueryCursor")] pub struct QueryCursor { ptr: NonNull, } -/// A key-value pair associated with a particular pattern in a `Query`. +/// A key-value pair associated with a particular pattern in a [`Query`]. #[derive(Debug, PartialEq, Eq)] pub struct QueryProperty { pub key: Box, @@ -156,14 +179,14 @@ pub enum QueryPredicateArg { String(Box), } -/// A key-value pair associated with a particular pattern in a `Query`. +/// A key-value pair associated with a particular pattern in a [`Query`]. #[derive(Debug, PartialEq, Eq)] pub struct QueryPredicate { pub operator: Box, - pub args: Vec, + pub args: Box<[QueryPredicateArg]>, } -/// A match of a `Query` to a particular set of `Node`s. +/// A match of a [`Query`] to a particular set of [`Node`]s. pub struct QueryMatch<'cursor, 'tree> { pub pattern_index: usize, pub captures: &'cursor [QueryCapture<'tree>], @@ -171,50 +194,55 @@ pub struct QueryMatch<'cursor, 'tree> { cursor: *mut ffi::TSQueryCursor, } -/// A sequence of `QueryMatch`es associated with a given `QueryCursor`. -pub struct QueryMatches<'a, 'tree: 'a, T: TextProvider<'a>> { +/// A sequence of [`QueryMatch`]es associated with a given [`QueryCursor`]. +pub struct QueryMatches<'query, 'cursor, T: TextProvider, I: AsRef<[u8]>> { ptr: *mut ffi::TSQueryCursor, - query: &'a Query, + query: &'query Query, text_provider: T, buffer1: Vec, buffer2: Vec, - _tree: PhantomData<&'tree ()>, + _phantom: PhantomData<(&'cursor (), I)>, } -/// A sequence of `QueryCapture`s associated with a given `QueryCursor`. -pub struct QueryCaptures<'a, 'tree: 'a, T: TextProvider<'a>> { +/// A sequence of [`QueryCapture`]s associated with a given [`QueryCursor`]. +pub struct QueryCaptures<'query, 'cursor, T: TextProvider, I: AsRef<[u8]>> { ptr: *mut ffi::TSQueryCursor, - query: &'a Query, + query: &'query Query, text_provider: T, buffer1: Vec, buffer2: Vec, - _tree: PhantomData<&'tree ()>, + _phantom: PhantomData<(&'cursor (), I)>, } -pub trait TextProvider<'a> { - type I: Iterator + 'a; +pub trait TextProvider +where + I: AsRef<[u8]>, +{ + type I: Iterator; fn text(&mut self, node: Node) -> Self::I; } -/// A particular `Node` that has been captured with a particular name within a `Query`. +/// A particular [`Node`] that has been captured with a particular name within a +/// [`Query`]. #[derive(Clone, Copy, Debug)] #[repr(C)] -pub struct QueryCapture<'a> { - pub node: Node<'a>, +pub struct QueryCapture<'tree> { + pub node: Node<'tree>, pub index: u32, } -/// An error that occurred when trying to assign an incompatible `Language` to a `Parser`. +/// An error that occurred when trying to assign an incompatible [`Language`] to +/// a [`Parser`]. #[derive(Debug, PartialEq, Eq)] pub struct LanguageError { version: usize, } -/// An error that occurred in `Parser::set_included_ranges`. +/// An error that occurred in [`Parser::set_included_ranges`]. #[derive(Debug, PartialEq, Eq)] pub struct IncludedRangesError(pub usize); -/// An error that occurred when trying to create a `Query`. +/// An error that occurred when trying to create a [`Query`]. #[derive(Debug, PartialEq, Eq)] pub struct QueryError { pub row: usize, @@ -236,10 +264,16 @@ pub enum QueryErrorKind { } #[derive(Debug)] -enum TextPredicate { - CaptureEqString(u32, String, bool), - CaptureEqCapture(u32, u32, bool), - CaptureMatchString(u32, regex::bytes::Regex, bool), +/// The first item is the capture index +/// The next is capture specific, depending on what item is expected +/// The first bool is if the capture is positive +/// The last item is a bool signifying whether or not it's meant to match +/// any or all captures +enum TextPredicateCapture { + EqString(u32, Box, bool, bool), + EqCapture(u32, u32, bool, bool), + MatchString(u32, regex::bytes::Regex, bool, bool), + AnyString(u32, Box<[Box]>, bool), } // TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` @@ -250,37 +284,44 @@ pub struct LossyUtf8<'a> { } impl Language { - /// Get the ABI version number that indicates which version of the Tree-sitter CLI - /// that was used to generate this `Language`. + /// Get the ABI version number that indicates which version of the + /// Tree-sitter CLI that was used to generate this [`Language`]. #[doc(alias = "ts_language_version")] + #[must_use] pub fn version(&self) -> usize { unsafe { ffi::ts_language_version(self.0) as usize } } /// Get the number of distinct node types in this language. #[doc(alias = "ts_language_symbol_count")] + #[must_use] pub fn node_kind_count(&self) -> usize { unsafe { ffi::ts_language_symbol_count(self.0) as usize } } + /// Get the number of valid states in this language. + #[doc(alias = "ts_language_state_count")] + #[must_use] + pub fn parse_state_count(&self) -> usize { + unsafe { ffi::ts_language_state_count(self.0) as usize } + } + /// Get the name of the node kind for the given numerical id. #[doc(alias = "ts_language_symbol_name")] + #[must_use] pub fn node_kind_for_id(&self, id: u16) -> Option<&'static str> { let ptr = unsafe { ffi::ts_language_symbol_name(self.0, id) }; - if ptr.is_null() { - None - } else { - Some(unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) - } + (!ptr.is_null()).then(|| unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) } /// Get the numeric id for the given node kind. #[doc(alias = "ts_language_symbol_for_name")] + #[must_use] pub fn id_for_node_kind(&self, kind: &str, named: bool) -> u16 { unsafe { ffi::ts_language_symbol_for_name( self.0, - kind.as_bytes().as_ptr() as *const c_char, + kind.as_bytes().as_ptr().cast::(), kind.len() as u32, named, ) @@ -289,99 +330,156 @@ impl Language { /// Check if the node type for the given numerical id is named (as opposed /// to an anonymous node type). + #[must_use] pub fn node_kind_is_named(&self, id: u16) -> bool { - unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular } + unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolTypeRegular } } #[doc(alias = "ts_language_symbol_type")] + #[must_use] pub fn node_kind_is_visible(&self, id: u16) -> bool { - unsafe { - ffi::ts_language_symbol_type(self.0, id) <= ffi::TSSymbolType_TSSymbolTypeAnonymous - } + unsafe { ffi::ts_language_symbol_type(self.0, id) <= ffi::TSSymbolTypeAnonymous } } /// Get the number of distinct field names in this language. #[doc(alias = "ts_language_field_count")] + #[must_use] pub fn field_count(&self) -> usize { unsafe { ffi::ts_language_field_count(self.0) as usize } } /// Get the field names for the given numerical id. #[doc(alias = "ts_language_field_name_for_id")] + #[must_use] pub fn field_name_for_id(&self, field_id: u16) -> Option<&'static str> { let ptr = unsafe { ffi::ts_language_field_name_for_id(self.0, field_id) }; - if ptr.is_null() { - None - } else { - Some(unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) - } + (!ptr.is_null()).then(|| unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) } /// Get the numerical id for the given field name. #[doc(alias = "ts_language_field_id_for_name")] - pub fn field_id_for_name(&self, field_name: impl AsRef<[u8]>) -> Option { + #[must_use] + pub fn field_id_for_name(&self, field_name: impl AsRef<[u8]>) -> Option { let field_name = field_name.as_ref(); let id = unsafe { ffi::ts_language_field_id_for_name( self.0, - field_name.as_ptr() as *const c_char, + field_name.as_ptr().cast::(), field_name.len() as u32, ) }; - if id == 0 { - None - } else { - Some(id) - } + FieldId::new(id) + } + + /// Get the next parse state. Combine this with + /// [`lookahead_iterator`](Language::lookahead_iterator) to + /// generate completion suggestions or valid symbols in error nodes. + /// + /// Example: + /// ``` + /// let state = language.next_state(node.parse_state(), node.grammar_id()); + /// ``` + #[doc(alias = "ts_language_next_state")] + #[must_use] + pub fn next_state(&self, state: u16, id: u16) -> u16 { + unsafe { ffi::ts_language_next_state(self.0, state, id) } + } + + /// Create a new lookahead iterator for this language and parse state. + /// + /// This returns `None` if state is invalid for this language. + /// + /// Iterating [`LookaheadIterator`] will yield valid symbols in the given + /// parse state. Newly created lookahead iterators will return the `ERROR` + /// symbol from [`LookaheadIterator::current_symbol`]. + /// + /// Lookahead iterators can be useful to generate suggestions and improve + /// syntax error diagnostics. To get symbols valid in an ERROR node, use the + /// lookahead iterator on its first leaf node state. For `MISSING` nodes, a + /// lookahead iterator created on the previous non-extra leaf node may be + /// appropriate. + #[doc(alias = "ts_lookahead_iterator_new")] + #[must_use] + pub fn lookahead_iterator(&self, state: u16) -> Option { + let ptr = unsafe { ffi::ts_lookahead_iterator_new(self.0, state) }; + (!ptr.is_null()).then(|| unsafe { LookaheadIterator::from_raw(ptr) }) + } +} + +impl Clone for Language { + fn clone(&self) -> Self { + unsafe { Self(ffi::ts_language_copy(self.0)) } + } +} + +impl Drop for Language { + fn drop(&mut self) { + unsafe { ffi::ts_language_delete(self.0) } + } +} + +impl<'a> Deref for LanguageRef<'a> { + type Target = Language; + + fn deref(&self) -> &Self::Target { + unsafe { &*(std::ptr::addr_of!(self.0).cast::()) } + } +} + +impl Default for Parser { + fn default() -> Self { + Self::new() } } impl Parser { /// Create a new parser. - pub fn new() -> Parser { + #[doc(alias = "ts_parser_new")] + #[must_use] + pub fn new() -> Self { unsafe { let parser = ffi::ts_parser_new(); - Parser(NonNull::new_unchecked(parser)) + Self(NonNull::new_unchecked(parser)) } } /// Set the language that the parser should use for parsing. /// /// Returns a Result indicating whether or not the language was successfully - /// assigned. True means assignment succeeded. False means there was a version - /// mismatch: the language was generated with an incompatible version of the - /// Tree-sitter CLI. Check the language's version using [Language::version] - /// and compare it to this library's [LANGUAGE_VERSION](LANGUAGE_VERSION) and - /// [MIN_COMPATIBLE_LANGUAGE_VERSION](MIN_COMPATIBLE_LANGUAGE_VERSION) constants. + /// assigned. True means assignment succeeded. False means there was a + /// version mismatch: the language was generated with an incompatible + /// version of the Tree-sitter CLI. Check the language's version using + /// [`Language::version`] and compare it to this library's + /// [`LANGUAGE_VERSION`](LANGUAGE_VERSION) and + /// [`MIN_COMPATIBLE_LANGUAGE_VERSION`](MIN_COMPATIBLE_LANGUAGE_VERSION) + /// constants. #[doc(alias = "ts_parser_set_language")] - pub fn set_language(&mut self, language: Language) -> Result<(), LanguageError> { + pub fn set_language(&mut self, language: &Language) -> Result<(), LanguageError> { let version = language.version(); - if version < MIN_COMPATIBLE_LANGUAGE_VERSION || version > LANGUAGE_VERSION { - Err(LanguageError { version }) - } else { + if (MIN_COMPATIBLE_LANGUAGE_VERSION..=LANGUAGE_VERSION).contains(&version) { unsafe { ffi::ts_parser_set_language(self.0.as_ptr(), language.0); } Ok(()) + } else { + Err(LanguageError { version }) } } /// Get the parser's current language. #[doc(alias = "ts_parser_language")] + #[must_use] pub fn language(&self) -> Option { let ptr = unsafe { ffi::ts_parser_language(self.0.as_ptr()) }; - if ptr.is_null() { - None - } else { - Some(Language(ptr)) - } + (!ptr.is_null()).then(|| Language(ptr)) } /// Get the parser's current logger. #[doc(alias = "ts_parser_logger")] + #[must_use] pub fn logger(&self) -> Option<&Logger> { let logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) }; - unsafe { (logger.payload as *mut Logger).as_ref() } + unsafe { logger.payload.cast::().as_ref() } } /// Set the logging callback that a parser should use during parsing. @@ -389,7 +487,7 @@ impl Parser { pub fn set_logger(&mut self, logger: Option) { let prev_logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) }; if !prev_logger.payload.is_null() { - drop(unsafe { Box::from_raw(prev_logger.payload as *mut Logger) }); + drop(unsafe { Box::from_raw(prev_logger.payload.cast::()) }); } let c_logger; @@ -401,9 +499,9 @@ impl Parser { c_log_type: ffi::TSLogType, c_message: *const c_char, ) { - let callback = (payload as *mut Logger).as_mut().unwrap(); + let callback = payload.cast::().as_mut().unwrap(); if let Ok(message) = CStr::from_ptr(c_message).to_str() { - let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse { + let log_type = if c_log_type == ffi::TSLogTypeParse { LogType::Parse } else { LogType::Lex @@ -415,7 +513,7 @@ impl Parser { let raw_container = Box::into_raw(container); c_logger = ffi::TSLogger { - payload: raw_container as *mut c_void, + payload: raw_container.cast::(), log: Some(log), }; } else { @@ -429,14 +527,30 @@ impl Parser { } /// Set the destination to which the parser should write debugging graphs - /// during parsing. The graphs are formatted in the DOT language. You may want - /// to pipe these graphs directly to a `dot(1)` process in order to generate - /// SVG output. - #[cfg(unix)] + /// during parsing. The graphs are formatted in the DOT language. You may + /// want to pipe these graphs directly to a `dot(1)` process in order to + /// generate SVG output. #[doc(alias = "ts_parser_print_dot_graphs")] - pub fn print_dot_graphs(&mut self, file: &impl AsRawFd) { - let fd = file.as_raw_fd(); - unsafe { ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::dup(fd)) } + pub fn print_dot_graphs( + &mut self, + #[cfg(any(unix, target_os = "wasi"))] file: &impl AsRawFd, + #[cfg(windows)] file: &impl AsRawHandle, + ) { + #[cfg(any(unix, target_os = "wasi"))] + { + let fd = file.as_raw_fd(); + unsafe { + ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::_ts_dup(fd)); + } + } + + #[cfg(windows)] + { + let handle = file.as_raw_handle(); + unsafe { + ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::_ts_dup(handle)); + } + } } /// Stop the parser from printing debugging graphs while parsing. @@ -449,21 +563,20 @@ impl Parser { /// /// # Arguments: /// * `text` The UTF8-encoded text to parse. - /// * `old_tree` A previous syntax tree parsed from the same document. - /// If the text of the document has changed since `old_tree` was - /// created, then you must edit `old_tree` to match the new text using - /// [Tree::edit]. - /// - /// Returns a [Tree] if parsing succeeded, or `None` if: - /// * The parser has not yet had a language assigned with [Parser::set_language] - /// * The timeout set with [Parser::set_timeout_micros] expired - /// * The cancellation flag set with [Parser::set_cancellation_flag] was flipped + /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the + /// document has changed since `old_tree` was created, then you must edit `old_tree` to match + /// the new text using [`Tree::edit`]. + /// + /// Returns a [`Tree`] if parsing succeeded, or `None` if: + /// * The parser has not yet had a language assigned with [`Parser::set_language`] + /// * The timeout set with [`Parser::set_timeout_micros`] expired + /// * The cancellation flag set with [`Parser::set_cancellation_flag`] was flipped #[doc(alias = "ts_parser_parse")] pub fn parse(&mut self, text: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option { let bytes = text.as_ref(); let len = bytes.len(); self.parse_with( - &mut |i, _| if i < len { &bytes[i..] } else { &[] }, + &mut |i, _| (i < len).then(|| &bytes[i..]).unwrap_or_default(), old_tree, ) } @@ -472,10 +585,9 @@ impl Parser { /// /// # Arguments: /// * `text` The UTF16-encoded text to parse. - /// * `old_tree` A previous syntax tree parsed from the same document. - /// If the text of the document has changed since `old_tree` was - /// created, then you must edit `old_tree` to match the new text using - /// [Tree::edit]. + /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the + /// document has changed since `old_tree` was created, then you must edit `old_tree` to match + /// the new text using [`Tree::edit`]. pub fn parse_utf16( &mut self, input: impl AsRef<[u16]>, @@ -484,7 +596,7 @@ impl Parser { let code_points = input.as_ref(); let len = code_points.len(); self.parse_utf16_with( - &mut |i, _| if i < len { &code_points[i..] } else { &[] }, + &mut |i, _| (i < len).then(|| &code_points[i..]).unwrap_or_default(), old_tree, ) } @@ -492,15 +604,14 @@ impl Parser { /// Parse UTF8 text provided in chunks by a callback. /// /// # Arguments: - /// * `callback` A function that takes a byte offset and position and - /// returns a slice of UTF8-encoded text starting at that byte offset - /// and position. The slices can be of any length. If the given position - /// is at the end of the text, the callback should return an empty slice. - /// * `old_tree` A previous syntax tree parsed from the same document. - /// If the text of the document has changed since `old_tree` was - /// created, then you must edit `old_tree` to match the new text using - /// [Tree::edit]. - pub fn parse_with<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>( + /// * `callback` A function that takes a byte offset and position and returns a slice of + /// UTF8-encoded text starting at that byte offset and position. The slices can be of any + /// length. If the given position is at the end of the text, the callback should return an + /// empty slice. + /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the + /// document has changed since `old_tree` was created, then you must edit `old_tree` to match + /// the new text using [`Tree::edit`]. + pub fn parse_with, F: FnMut(usize, Point) -> T>( &mut self, callback: &mut F, old_tree: Option<&Tree>, @@ -508,28 +619,28 @@ impl Parser { // A pointer to this payload is passed on every call to the `read` C function. // The payload contains two things: // 1. A reference to the rust `callback`. - // 2. The text that was returned from the previous call to `callback`. - // This allows the callback to return owned values like vectors. + // 2. The text that was returned from the previous call to `callback`. This allows the + // callback to return owned values like vectors. let mut payload: (&mut F, Option) = (callback, None); // This C function is passed to Tree-sitter as the input callback. - unsafe extern "C" fn read<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>( + unsafe extern "C" fn read, F: FnMut(usize, Point) -> T>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { - let (callback, text) = (payload as *mut (&mut F, Option)).as_mut().unwrap(); + let (callback, text) = payload.cast::<(&mut F, Option)>().as_mut().unwrap(); *text = Some(callback(byte_offset as usize, position.into())); let slice = text.as_ref().unwrap().as_ref(); *bytes_read = slice.len() as u32; - return slice.as_ptr() as *const c_char; + slice.as_ptr().cast::() } let c_input = ffi::TSInput { - payload: &mut payload as *mut (&mut F, Option) as *mut c_void, + payload: std::ptr::addr_of_mut!(payload).cast::(), read: Some(read::), - encoding: ffi::TSInputEncoding_TSInputEncodingUTF8, + encoding: ffi::TSInputEncodingUTF8, }; let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); @@ -542,15 +653,14 @@ impl Parser { /// Parse UTF16 text provided in chunks by a callback. /// /// # Arguments: - /// * `callback` A function that takes a code point offset and position and - /// returns a slice of UTF16-encoded text starting at that byte offset - /// and position. The slices can be of any length. If the given position - /// is at the end of the text, the callback should return an empty slice. - /// * `old_tree` A previous syntax tree parsed from the same document. - /// If the text of the document has changed since `old_tree` was - /// created, then you must edit `old_tree` to match the new text using - /// [Tree::edit]. - pub fn parse_utf16_with<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>( + /// * `callback` A function that takes a code point offset and position and returns a slice of + /// UTF16-encoded text starting at that byte offset and position. The slices can be of any + /// length. If the given position is at the end of the text, the callback should return an + /// empty slice. + /// * `old_tree` A previous syntax tree parsed from the same document. If the text of the + /// document has changed since `old_tree` was created, then you must edit `old_tree` to match + /// the new text using [`Tree::edit`]. + pub fn parse_utf16_with, F: FnMut(usize, Point) -> T>( &mut self, callback: &mut F, old_tree: Option<&Tree>, @@ -558,18 +668,18 @@ impl Parser { // A pointer to this payload is passed on every call to the `read` C function. // The payload contains two things: // 1. A reference to the rust `callback`. - // 2. The text that was returned from the previous call to `callback`. - // This allows the callback to return owned values like vectors. + // 2. The text that was returned from the previous call to `callback`. This allows the + // callback to return owned values like vectors. let mut payload: (&mut F, Option) = (callback, None); // This C function is passed to Tree-sitter as the input callback. - unsafe extern "C" fn read<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>( + unsafe extern "C" fn read, F: FnMut(usize, Point) -> T>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { - let (callback, text) = (payload as *mut (&mut F, Option)).as_mut().unwrap(); + let (callback, text) = payload.cast::<(&mut F, Option)>().as_mut().unwrap(); *text = Some(callback( (byte_offset / 2) as usize, Point { @@ -579,13 +689,13 @@ impl Parser { )); let slice = text.as_ref().unwrap().as_ref(); *bytes_read = slice.len() as u32 * 2; - slice.as_ptr() as *const c_char + slice.as_ptr().cast::() } let c_input = ffi::TSInput { - payload: &mut payload as *mut (&mut F, Option) as *mut c_void, + payload: std::ptr::addr_of_mut!(payload).cast::(), read: Some(read::), - encoding: ffi::TSInputEncoding_TSInputEncodingUTF16, + encoding: ffi::TSInputEncodingUTF16, }; let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); @@ -597,10 +707,11 @@ impl Parser { /// Instruct the parser to start the next parse from the beginning. /// - /// If the parser previously failed because of a timeout or a cancellation, then - /// by default, it will resume where it left off on the next call to `parse` or - /// other parsing functions. If you don't want to resume, and instead intend to - /// use this parser to parse some other document, you must call `reset` first. + /// If the parser previously failed because of a timeout or a cancellation, + /// then by default, it will resume where it left off on the next call + /// to [`parse`](Parser::parse) or other parsing functions. If you don't + /// want to resume, and instead intend to use this parser to parse some + /// other document, you must call `reset` first. #[doc(alias = "ts_parser_reset")] pub fn reset(&mut self) { unsafe { ffi::ts_parser_reset(self.0.as_ptr()) } @@ -608,17 +719,18 @@ impl Parser { /// Get the duration in microseconds that parsing is allowed to take. /// - /// This is set via [set_timeout_micros](Parser::set_timeout_micros). + /// This is set via [`set_timeout_micros`](Parser::set_timeout_micros). #[doc(alias = "ts_parser_timeout_micros")] + #[must_use] pub fn timeout_micros(&self) -> u64 { unsafe { ffi::ts_parser_timeout_micros(self.0.as_ptr()) } } - /// Set the maximum duration in microseconds that parsing should be allowed to - /// take before halting. + /// Set the maximum duration in microseconds that parsing should be allowed + /// to take before halting. /// /// If parsing takes longer than this, it will halt early, returning `None`. - /// See `parse` for more information. + /// See [`parse`](Parser::parse) for more information. #[doc(alias = "ts_parser_set_timeout_micros")] pub fn set_timeout_micros(&mut self, timeout_micros: u64) { unsafe { ffi::ts_parser_set_timeout_micros(self.0.as_ptr(), timeout_micros) } @@ -626,27 +738,28 @@ impl Parser { /// Set the ranges of text that the parser should include when parsing. /// - /// By default, the parser will always include entire documents. This function - /// allows you to parse only a *portion* of a document but still return a syntax - /// tree whose ranges match up with the document as a whole. You can also pass - /// multiple disjoint ranges. + /// By default, the parser will always include entire documents. This + /// function allows you to parse only a *portion* of a document but + /// still return a syntax tree whose ranges match up with the document + /// as a whole. You can also pass multiple disjoint ranges. /// - /// If `ranges` is empty, then the entire document will be parsed. Otherwise, - /// the given ranges must be ordered from earliest to latest in the document, - /// and they must not overlap. That is, the following must hold for all - /// `i` < `length - 1`: + /// If `ranges` is empty, then the entire document will be parsed. + /// Otherwise, the given ranges must be ordered from earliest to latest + /// in the document, and they must not overlap. That is, the following + /// must hold for all `i` < `length - 1`: /// ```text /// ranges[i].end_byte <= ranges[i + 1].start_byte /// ``` - /// If this requirement is not satisfied, method will return IncludedRangesError - /// error with an offset in the passed ranges slice pointing to a first incorrect range. + /// If this requirement is not satisfied, method will return + /// [`IncludedRangesError`] error with an offset in the passed ranges + /// slice pointing to a first incorrect range. #[doc(alias = "ts_parser_set_included_ranges")] - pub fn set_included_ranges<'a>( - &mut self, - ranges: &'a [Range], - ) -> Result<(), IncludedRangesError> { - let ts_ranges: Vec = - ranges.iter().cloned().map(|range| range.into()).collect(); + pub fn set_included_ranges(&mut self, ranges: &[Range]) -> Result<(), IncludedRangesError> { + let ts_ranges = ranges + .iter() + .copied() + .map(std::convert::Into::into) + .collect::>(); let result = unsafe { ffi::ts_parser_set_included_ranges( self.0.as_ptr(), @@ -669,23 +782,53 @@ impl Parser { } } + /// Get the ranges of text that the parser will include when parsing. + #[doc(alias = "ts_parser_included_ranges")] + #[must_use] + pub fn included_ranges(&self) -> Vec { + let mut count = 0u32; + unsafe { + let ptr = + ffi::ts_parser_included_ranges(self.0.as_ptr(), std::ptr::addr_of_mut!(count)); + let ranges = slice::from_raw_parts(ptr, count as usize); + let result = ranges + .iter() + .copied() + .map(std::convert::Into::into) + .collect(); + result + } + } + /// Get the parser's current cancellation flag pointer. + /// + /// # Safety + /// + /// It uses FFI #[doc(alias = "ts_parser_cancellation_flag")] + #[must_use] pub unsafe fn cancellation_flag(&self) -> Option<&AtomicUsize> { - (ffi::ts_parser_cancellation_flag(self.0.as_ptr()) as *const AtomicUsize).as_ref() + ffi::ts_parser_cancellation_flag(self.0.as_ptr()) + .cast::() + .as_ref() } /// Set the parser's current cancellation flag pointer. /// /// If a pointer is assigned, then the parser will periodically read from - /// this pointer during parsing. If it reads a non-zero value, it will halt early, - /// returning `None`. See [parse](Parser::parse) for more information. + /// this pointer during parsing. If it reads a non-zero value, it will halt + /// early, returning `None`. See [`parse`](Parser::parse) for more + /// information. + /// + /// # Safety + /// + /// It uses FFI #[doc(alias = "ts_parser_set_cancellation_flag")] pub unsafe fn set_cancellation_flag(&mut self, flag: Option<&AtomicUsize>) { if let Some(flag) = flag { ffi::ts_parser_set_cancellation_flag( self.0.as_ptr(), - flag as *const AtomicUsize as *const usize, + (flag as *const AtomicUsize).cast::(), ); } else { ffi::ts_parser_set_cancellation_flag(self.0.as_ptr(), ptr::null()); @@ -704,6 +847,7 @@ impl Drop for Parser { impl Tree { /// Get the root node of the syntax tree. #[doc(alias = "ts_tree_root_node")] + #[must_use] pub fn root_node(&self) -> Node { Node::new(unsafe { ffi::ts_tree_root_node(self.0.as_ptr()) }).unwrap() } @@ -711,6 +855,7 @@ impl Tree { /// Get the root node of the syntax tree, but with its position shifted /// forward by the given offset. #[doc(alias = "ts_tree_root_node_with_offset")] + #[must_use] pub fn root_node_with_offset(&self, offset_bytes: usize, offset_extent: Point) -> Node { Node::new(unsafe { ffi::ts_tree_root_node_with_offset( @@ -724,8 +869,12 @@ impl Tree { /// Get the language that was used to parse the syntax tree. #[doc(alias = "ts_tree_language")] - pub fn language(&self) -> Language { - Language(unsafe { ffi::ts_tree_language(self.0.as_ptr()) }) + #[must_use] + pub fn language(&self) -> LanguageRef { + LanguageRef( + unsafe { ffi::ts_tree_language(self.0.as_ptr()) }, + PhantomData, + ) } /// Edit the syntax tree to keep it in sync with source code that has been @@ -739,56 +888,79 @@ impl Tree { unsafe { ffi::ts_tree_edit(self.0.as_ptr(), &edit) }; } - /// Create a new [TreeCursor] starting from the root of the tree. + /// Create a new [`TreeCursor`] starting from the root of the tree. + #[must_use] pub fn walk(&self) -> TreeCursor { self.root_node().walk() } - /// Compare this old edited syntax tree to a new syntax tree representing the same - /// document, returning a sequence of ranges whose syntactic structure has changed. + /// Compare this old edited syntax tree to a new syntax tree representing + /// the same document, returning a sequence of ranges whose syntactic + /// structure has changed. /// - /// For this to work correctly, this syntax tree must have been edited such that its - /// ranges match up to the new tree. Generally, you'll want to call this method right - /// after calling one of the [Parser::parse] functions. Call it on the old tree that - /// was passed to parse, and pass the new tree that was returned from `parse`. + /// For this to work correctly, this syntax tree must have been edited such + /// that its ranges match up to the new tree. Generally, you'll want to + /// call this method right after calling one of the [`Parser::parse`] + /// functions. Call it on the old tree that was passed to parse, and + /// pass the new tree that was returned from `parse`. #[doc(alias = "ts_tree_get_changed_ranges")] - pub fn changed_ranges(&self, other: &Tree) -> impl ExactSizeIterator { + #[must_use] + pub fn changed_ranges(&self, other: &Self) -> impl ExactSizeIterator { let mut count = 0u32; unsafe { let ptr = ffi::ts_tree_get_changed_ranges( self.0.as_ptr(), other.0.as_ptr(), - &mut count as *mut u32, + std::ptr::addr_of_mut!(count), ); - util::CBufferIter::new(ptr, count as usize).map(|r| r.into()) + util::CBufferIter::new(ptr, count as usize).map(std::convert::Into::into) } } /// Get the included ranges that were used to parse the syntax tree. + #[doc(alias = "ts_tree_included_ranges")] + #[must_use] pub fn included_ranges(&self) -> Vec { let mut count = 0u32; unsafe { - let ptr = ffi::ts_tree_included_ranges(self.0.as_ptr(), &mut count as *mut u32); + let ptr = ffi::ts_tree_included_ranges(self.0.as_ptr(), std::ptr::addr_of_mut!(count)); let ranges = slice::from_raw_parts(ptr, count as usize); - let result = ranges.iter().copied().map(|range| range.into()).collect(); - (FREE_FN)(ptr as *mut c_void); + let result = ranges + .iter() + .copied() + .map(std::convert::Into::into) + .collect(); + (FREE_FN)(ptr.cast::()); result } } /// Print a graph of the tree to the given file descriptor. - /// The graph is formatted in the DOT language. You may want to pipe this graph - /// directly to a `dot(1)` process in order to generate SVG output. - #[cfg(unix)] + /// The graph is formatted in the DOT language. You may want to pipe this + /// graph directly to a `dot(1)` process in order to generate SVG + /// output. #[doc(alias = "ts_tree_print_dot_graph")] - pub fn print_dot_graph(&self, file: &impl AsRawFd) { - let fd = file.as_raw_fd(); - unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), fd) } + pub fn print_dot_graph( + &self, + #[cfg(any(unix, target_os = "wasi"))] file: &impl AsRawFd, + #[cfg(windows)] file: &impl AsRawHandle, + ) { + #[cfg(any(unix, target_os = "wasi"))] + { + let fd = file.as_raw_fd(); + unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), fd) } + } + + #[cfg(windows)] + { + let handle = file.as_raw_handle(); + unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), handle as i32) } + } } } impl fmt::Debug for Tree { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{{Tree {:?}}}", self.root_node()) } } @@ -800,18 +972,14 @@ impl Drop for Tree { } impl Clone for Tree { - fn clone(&self) -> Tree { - unsafe { Tree(NonNull::new_unchecked(ffi::ts_tree_copy(self.0.as_ptr()))) } + fn clone(&self) -> Self { + unsafe { Self(NonNull::new_unchecked(ffi::ts_tree_copy(self.0.as_ptr()))) } } } impl<'tree> Node<'tree> { fn new(node: ffi::TSNode) -> Option { - if node.id.is_null() { - None - } else { - Some(Node(node, PhantomData)) - } + (!node.id.is_null()).then_some(Node(node, PhantomData)) } /// Get a numeric id for this node that is unique. @@ -820,97 +988,144 @@ impl<'tree> Node<'tree> { /// a new tree is created based on an older tree, and a node from the old /// tree is reused in the process, then that node will have the same id in /// both trees. + #[must_use] pub fn id(&self) -> usize { self.0.id as usize } /// Get this node's type as a numerical id. #[doc(alias = "ts_node_symbol")] + #[must_use] pub fn kind_id(&self) -> u16 { unsafe { ffi::ts_node_symbol(self.0) } } + /// Get the node's type as a numerical id as it appears in the grammar + /// ignoring aliases. + #[doc(alias = "ts_node_grammar_symbol")] + #[must_use] + pub fn grammar_id(&self) -> u16 { + unsafe { ffi::ts_node_grammar_symbol(self.0) } + } + /// Get this node's type as a string. #[doc(alias = "ts_node_type")] + #[must_use] pub fn kind(&self) -> &'static str { unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) } .to_str() .unwrap() } - /// Get the [Language] that was used to parse this node's syntax tree. - #[doc(alias = "ts_tree_language")] - pub fn language(&self) -> Language { - Language(unsafe { ffi::ts_tree_language(self.0.tree) }) + /// Get this node's symbol name as it appears in the grammar ignoring + /// aliases as a string. + #[doc(alias = "ts_node_grammar_type")] + #[must_use] + pub fn grammar_name(&self) -> &'static str { + unsafe { CStr::from_ptr(ffi::ts_node_grammar_type(self.0)) } + .to_str() + .unwrap() + } + + /// Get the [`Language`] that was used to parse this node's syntax tree. + #[doc(alias = "ts_node_language")] + #[must_use] + pub fn language(&self) -> LanguageRef { + LanguageRef(unsafe { ffi::ts_node_language(self.0) }, PhantomData) } /// Check if this node is *named*. /// - /// Named nodes correspond to named rules in the grammar, whereas *anonymous* nodes - /// correspond to string literals in the grammar. + /// Named nodes correspond to named rules in the grammar, whereas + /// *anonymous* nodes correspond to string literals in the grammar. #[doc(alias = "ts_node_is_named")] + #[must_use] pub fn is_named(&self) -> bool { unsafe { ffi::ts_node_is_named(self.0) } } /// Check if this node is *extra*. /// - /// Extra nodes represent things like comments, which are not required the grammar, - /// but can appear anywhere. + /// Extra nodes represent things like comments, which are not required the + /// grammar, but can appear anywhere. #[doc(alias = "ts_node_is_extra")] + #[must_use] pub fn is_extra(&self) -> bool { unsafe { ffi::ts_node_is_extra(self.0) } } /// Check if this node has been edited. #[doc(alias = "ts_node_has_changes")] + #[must_use] pub fn has_changes(&self) -> bool { unsafe { ffi::ts_node_has_changes(self.0) } } - /// Check if this node represents a syntax error or contains any syntax errors anywhere - /// within it. + /// Check if this node represents a syntax error or contains any syntax + /// errors anywhere within it. #[doc(alias = "ts_node_has_error")] + #[must_use] pub fn has_error(&self) -> bool { unsafe { ffi::ts_node_has_error(self.0) } } /// Check if this node represents a syntax error. /// - /// Syntax errors represent parts of the code that could not be incorporated into a - /// valid syntax tree. + /// Syntax errors represent parts of the code that could not be incorporated + /// into a valid syntax tree. + #[doc(alias = "ts_node_is_error")] + #[must_use] pub fn is_error(&self) -> bool { - self.kind_id() == u16::MAX + unsafe { ffi::ts_node_is_error(self.0) } + } + + /// Get this node's parse state. + #[doc(alias = "ts_node_parse_state")] + #[must_use] + pub fn parse_state(&self) -> u16 { + unsafe { ffi::ts_node_parse_state(self.0) } + } + + /// Get the parse state after this node. + #[doc(alias = "ts_node_next_parse_state")] + #[must_use] + pub fn next_parse_state(&self) -> u16 { + unsafe { ffi::ts_node_next_parse_state(self.0) } } /// Check if this node is *missing*. /// - /// Missing nodes are inserted by the parser in order to recover from certain kinds of - /// syntax errors. + /// Missing nodes are inserted by the parser in order to recover from + /// certain kinds of syntax errors. #[doc(alias = "ts_node_is_missing")] + #[must_use] pub fn is_missing(&self) -> bool { unsafe { ffi::ts_node_is_missing(self.0) } } /// Get the byte offsets where this node starts. #[doc(alias = "ts_node_start_byte")] + #[must_use] pub fn start_byte(&self) -> usize { unsafe { ffi::ts_node_start_byte(self.0) as usize } } /// Get the byte offsets where this node end. #[doc(alias = "ts_node_end_byte")] + #[must_use] pub fn end_byte(&self) -> usize { unsafe { ffi::ts_node_end_byte(self.0) as usize } } /// Get the byte range of source code that this node represents. + #[must_use] pub fn byte_range(&self) -> std::ops::Range { self.start_byte()..self.end_byte() } - /// Get the range of source code that this node represents, both in terms of raw bytes - /// and of row/column coordinates. + /// Get the range of source code that this node represents, both in terms of + /// raw bytes and of row/column coordinates. + #[must_use] pub fn range(&self) -> Range { Range { start_byte: self.start_byte(), @@ -922,6 +1137,7 @@ impl<'tree> Node<'tree> { /// Get this node's start position in terms of rows and columns. #[doc(alias = "ts_node_start_point")] + #[must_use] pub fn start_position(&self) -> Point { let result = unsafe { ffi::ts_node_start_point(self.0) }; result.into() @@ -929,6 +1145,7 @@ impl<'tree> Node<'tree> { /// Get this node's end position in terms of rows and columns. #[doc(alias = "ts_node_end_point")] + #[must_use] pub fn end_position(&self) -> Point { let result = unsafe { ffi::ts_node_end_point(self.0) }; result.into() @@ -937,35 +1154,39 @@ impl<'tree> Node<'tree> { /// Get the node's child at the given index, where zero represents the first /// child. /// - /// This method is fairly fast, but its cost is technically log(i), so you - /// if you might be iterating over a long list of children, you should use - /// [Node::children] instead. + /// This method is fairly fast, but its cost is technically log(i), so if + /// you might be iterating over a long list of children, you should use + /// [`Node::children`] instead. #[doc(alias = "ts_node_child")] + #[must_use] pub fn child(&self, i: usize) -> Option { Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) }) } /// Get this node's number of children. #[doc(alias = "ts_node_child_count")] + #[must_use] pub fn child_count(&self) -> usize { unsafe { ffi::ts_node_child_count(self.0) as usize } } /// Get this node's *named* child at the given index. /// - /// See also [Node::is_named]. - /// This method is fairly fast, but its cost is technically log(i), so you - /// if you might be iterating over a long list of children, you should use - /// [Node::named_children] instead. + /// See also [`Node::is_named`]. + /// This method is fairly fast, but its cost is technically log(i), so if + /// you might be iterating over a long list of children, you should use + /// [`Node::named_children`] instead. #[doc(alias = "ts_node_named_child")] - pub fn named_child<'a>(&'a self, i: usize) -> Option { + #[must_use] + pub fn named_child(&self, i: usize) -> Option { Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) }) } /// Get this node's number of *named* children. /// - /// See also [Node::is_named]. + /// See also [`Node::is_named`]. #[doc(alias = "ts_node_named_child_count")] + #[must_use] pub fn named_child_count(&self) -> usize { unsafe { ffi::ts_node_named_child_count(self.0) as usize } } @@ -973,14 +1194,15 @@ impl<'tree> Node<'tree> { /// Get the first child with the given field name. /// /// If multiple children may have the same field name, access them using - /// [children_by_field_name](Node::children_by_field_name) + /// [`children_by_field_name`](Node::children_by_field_name) #[doc(alias = "ts_node_child_by_field_name")] + #[must_use] pub fn child_by_field_name(&self, field_name: impl AsRef<[u8]>) -> Option { let field_name = field_name.as_ref(); Self::new(unsafe { ffi::ts_node_child_by_field_name( self.0, - field_name.as_ptr() as *const c_char, + field_name.as_ptr().cast::(), field_name.len() as u32, ) }) @@ -988,42 +1210,40 @@ impl<'tree> Node<'tree> { /// Get this node's child with the given numerical field id. /// - /// See also [child_by_field_name](Node::child_by_field_name). You can convert a field name to - /// an id using [Language::field_id_for_name]. + /// See also [`child_by_field_name`](Node::child_by_field_name). You can + /// convert a field name to an id using [`Language::field_id_for_name`]. #[doc(alias = "ts_node_child_by_field_id")] + #[must_use] pub fn child_by_field_id(&self, field_id: u16) -> Option { Self::new(unsafe { ffi::ts_node_child_by_field_id(self.0, field_id) }) } /// Get the field name of this node's child at the given index. #[doc(alias = "ts_node_field_name_for_child")] + #[must_use] pub fn field_name_for_child(&self, child_index: u32) -> Option<&'static str> { unsafe { let ptr = ffi::ts_node_field_name_for_child(self.0, child_index); - if ptr.is_null() { - None - } else { - Some(CStr::from_ptr(ptr).to_str().unwrap()) - } + (!ptr.is_null()).then(|| CStr::from_ptr(ptr).to_str().unwrap()) } } /// Iterate over this node's children. /// - /// A [TreeCursor] is used to retrieve the children efficiently. Obtain - /// a [TreeCursor] by calling [Tree::walk] or [Node::walk]. To avoid unnecessary - /// allocations, you should reuse the same cursor for subsequent calls to - /// this method. + /// A [`TreeCursor`] is used to retrieve the children efficiently. Obtain + /// a [`TreeCursor`] by calling [`Tree::walk`] or [`Node::walk`]. To avoid + /// unnecessary allocations, you should reuse the same cursor for + /// subsequent calls to this method. /// - /// If you're walking the tree recursively, you may want to use the `TreeCursor` - /// APIs directly instead. - pub fn children<'a>( + /// If you're walking the tree recursively, you may want to use the + /// [`TreeCursor`] APIs directly instead. + pub fn children<'cursor>( &self, - cursor: &'a mut TreeCursor<'tree>, - ) -> impl ExactSizeIterator> + 'a { + cursor: &'cursor mut TreeCursor<'tree>, + ) -> impl ExactSizeIterator> + 'cursor { cursor.reset(*self); cursor.goto_first_child(); - (0..self.child_count()).into_iter().map(move |_| { + (0..self.child_count()).map(move |_| { let result = cursor.node(); cursor.goto_next_sibling(); result @@ -1032,14 +1252,14 @@ impl<'tree> Node<'tree> { /// Iterate over this node's named children. /// - /// See also [Node::children]. - pub fn named_children<'a>( + /// See also [`Node::children`]. + pub fn named_children<'cursor>( &self, - cursor: &'a mut TreeCursor<'tree>, - ) -> impl ExactSizeIterator> + 'a { + cursor: &'cursor mut TreeCursor<'tree>, + ) -> impl ExactSizeIterator> + 'cursor { cursor.reset(*self); cursor.goto_first_child(); - (0..self.named_child_count()).into_iter().map(move |_| { + (0..self.named_child_count()).map(move |_| { while !cursor.node().is_named() { if !cursor.goto_next_sibling() { break; @@ -1053,29 +1273,48 @@ impl<'tree> Node<'tree> { /// Iterate over this node's children with a given field name. /// - /// See also [Node::children]. - pub fn children_by_field_name<'a>( + /// See also [`Node::children`]. + pub fn children_by_field_name<'cursor>( &self, field_name: &str, - cursor: &'a mut TreeCursor<'tree>, - ) -> impl Iterator> + 'a { + cursor: &'cursor mut TreeCursor<'tree>, + ) -> impl Iterator> + 'cursor { let field_id = self.language().field_id_for_name(field_name); - self.children_by_field_id(field_id.unwrap_or(0), cursor) + let mut done = field_id.is_none(); + if !done { + cursor.reset(*self); + cursor.goto_first_child(); + } + iter::from_fn(move || { + if !done { + while cursor.field_id() != field_id { + if !cursor.goto_next_sibling() { + return None; + } + } + let result = cursor.node(); + if !cursor.goto_next_sibling() { + done = true; + } + return Some(result); + } + None + }) } /// Iterate over this node's children with a given field id. /// - /// See also [Node::children_by_field_name]. - pub fn children_by_field_id<'a>( + /// See also [`Node::children_by_field_name`]. + pub fn children_by_field_id<'cursor>( &self, - field_id: u16, - cursor: &'a mut TreeCursor<'tree>, - ) -> impl Iterator> + 'a { + field_id: FieldId, + cursor: &'cursor mut TreeCursor<'tree>, + ) -> impl Iterator> + 'cursor { cursor.reset(*self); cursor.goto_first_child(); let mut done = false; iter::from_fn(move || { - while !done { + if !done { while cursor.field_id() != Some(field_id) { if !cursor.goto_next_sibling() { return None; @@ -1092,37 +1331,59 @@ impl<'tree> Node<'tree> { } /// Get this node's immediate parent. + /// Prefer [`child_containing_descendant`](Node::child_containing_descendant) + /// for iterating over this node's ancestors. #[doc(alias = "ts_node_parent")] + #[must_use] pub fn parent(&self) -> Option { Self::new(unsafe { ffi::ts_node_parent(self.0) }) } + /// Get this node's child that contains `descendant`. + #[doc(alias = "ts_node_child_containing_descendant")] + #[must_use] + pub fn child_containing_descendant(&self, descendant: Self) -> Option { + Self::new(unsafe { ffi::ts_node_child_containing_descendant(self.0, descendant.0) }) + } + /// Get this node's next sibling. #[doc(alias = "ts_node_next_sibling")] + #[must_use] pub fn next_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_next_sibling(self.0) }) } /// Get this node's previous sibling. #[doc(alias = "ts_node_prev_sibling")] + #[must_use] pub fn prev_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) }) } /// Get this node's next named sibling. #[doc(alias = "ts_node_next_named_sibling")] + #[must_use] pub fn next_named_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) }) } /// Get this node's previous named sibling. #[doc(alias = "ts_node_prev_named_sibling")] + #[must_use] pub fn prev_named_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) }) } + /// Get the node's number of descendants, including one for the node itself. + #[doc(alias = "ts_node_descendant_count")] + #[must_use] + pub fn descendant_count(&self) -> usize { + unsafe { ffi::ts_node_descendant_count(self.0) as usize } + } + /// Get the smallest node within this node that spans the given range. #[doc(alias = "ts_node_descendant_for_byte_range")] + #[must_use] pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option { Self::new(unsafe { ffi::ts_node_descendant_for_byte_range(self.0, start as u32, end as u32) @@ -1131,6 +1392,7 @@ impl<'tree> Node<'tree> { /// Get the smallest named node within this node that spans the given range. #[doc(alias = "ts_node_named_descendant_for_byte_range")] + #[must_use] pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option { Self::new(unsafe { ffi::ts_node_named_descendant_for_byte_range(self.0, start as u32, end as u32) @@ -1139,6 +1401,7 @@ impl<'tree> Node<'tree> { /// Get the smallest node within this node that spans the given range. #[doc(alias = "ts_node_descendant_for_point_range")] + #[must_use] pub fn descendant_for_point_range(&self, start: Point, end: Point) -> Option { Self::new(unsafe { ffi::ts_node_descendant_for_point_range(self.0, start.into(), end.into()) @@ -1147,6 +1410,7 @@ impl<'tree> Node<'tree> { /// Get the smallest named node within this node that spans the given range. #[doc(alias = "ts_node_named_descendant_for_point_range")] + #[must_use] pub fn named_descendant_for_point_range(&self, start: Point, end: Point) -> Option { Self::new(unsafe { ffi::ts_node_named_descendant_for_point_range(self.0, start.into(), end.into()) @@ -1154,13 +1418,14 @@ impl<'tree> Node<'tree> { } #[doc(alias = "ts_node_string")] + #[must_use] pub fn to_sexp(&self) -> String { let c_string = unsafe { ffi::ts_node_string(self.0) }; let result = unsafe { CStr::from_ptr(c_string) } .to_str() .unwrap() .to_string(); - unsafe { (FREE_FN)(c_string as *mut c_void) }; + unsafe { (FREE_FN)(c_string.cast::()) }; result } @@ -1168,39 +1433,41 @@ impl<'tree> Node<'tree> { str::from_utf8(&source[self.start_byte()..self.end_byte()]) } + #[must_use] pub fn utf16_text<'a>(&self, source: &'a [u16]) -> &'a [u16] { - &source.as_ref()[self.start_byte()..self.end_byte()] + &source[self.start_byte()..self.end_byte()] } - /// Create a new [TreeCursor] starting from this node. + /// Create a new [`TreeCursor`] starting from this node. #[doc(alias = "ts_tree_cursor_new")] + #[must_use] pub fn walk(&self) -> TreeCursor<'tree> { TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData) } /// Edit this node to keep it in-sync with source code that has been edited. /// - /// This function is only rarely needed. When you edit a syntax tree with the - /// [Tree::edit] method, all of the nodes that you retrieve from the tree - /// afterward will already reflect the edit. You only need to use [Node::edit] - /// when you have a specific [Node] instance that you want to keep and continue - /// to use after an edit. + /// This function is only rarely needed. When you edit a syntax tree with + /// the [`Tree::edit`] method, all of the nodes that you retrieve from + /// the tree afterward will already reflect the edit. You only need to + /// use [`Node::edit`] when you have a specific [`Node`] instance that + /// you want to keep and continue to use after an edit. #[doc(alias = "ts_node_edit")] pub fn edit(&mut self, edit: &InputEdit) { let edit = edit.into(); - unsafe { ffi::ts_node_edit(&mut self.0 as *mut ffi::TSNode, &edit) } + unsafe { ffi::ts_node_edit(std::ptr::addr_of_mut!(self.0), &edit) } } } -impl<'a> PartialEq for Node<'a> { +impl PartialEq for Node<'_> { fn eq(&self, other: &Self) -> bool { self.0.id == other.0.id } } -impl<'a> Eq for Node<'a> {} +impl Eq for Node<'_> {} -impl<'a> hash::Hash for Node<'a> { +impl hash::Hash for Node<'_> { fn hash(&self, state: &mut H) { self.0.id.hash(state); self.0.context[0].hash(state); @@ -1210,8 +1477,8 @@ impl<'a> hash::Hash for Node<'a> { } } -impl<'a> fmt::Debug for Node<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { +impl fmt::Debug for Node<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, "{{Node {} {} - {}}}", @@ -1222,10 +1489,24 @@ impl<'a> fmt::Debug for Node<'a> { } } -impl<'a> TreeCursor<'a> { - /// Get the tree cursor's current [Node]. +impl fmt::Display for Node<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let sexp = self.to_sexp(); + if sexp.is_empty() { + write!(f, "") + } else if !f.alternate() { + write!(f, "{sexp}") + } else { + write!(f, "{}", format_sexp(&sexp, f.width().unwrap_or(0))) + } + } +} + +impl<'cursor> TreeCursor<'cursor> { + /// Get the tree cursor's current [`Node`]. #[doc(alias = "ts_tree_cursor_current_node")] - pub fn node(&self) -> Node<'a> { + #[must_use] + pub fn node(&self) -> Node<'cursor> { Node( unsafe { ffi::ts_tree_cursor_current_node(&self.0) }, PhantomData, @@ -1234,110 +1515,239 @@ impl<'a> TreeCursor<'a> { /// Get the numerical field id of this tree cursor's current node. /// - /// See also [field_name](TreeCursor::field_name). + /// See also [`field_name`](TreeCursor::field_name). #[doc(alias = "ts_tree_cursor_current_field_id")] - pub fn field_id(&self) -> Option { - unsafe { - let id = ffi::ts_tree_cursor_current_field_id(&self.0); - if id == 0 { - None - } else { - Some(id) - } - } + #[must_use] + pub fn field_id(&self) -> Option { + let id = unsafe { ffi::ts_tree_cursor_current_field_id(&self.0) }; + FieldId::new(id) } /// Get the field name of this tree cursor's current node. #[doc(alias = "ts_tree_cursor_current_field_name")] + #[must_use] pub fn field_name(&self) -> Option<&'static str> { unsafe { let ptr = ffi::ts_tree_cursor_current_field_name(&self.0); - if ptr.is_null() { - None - } else { - Some(CStr::from_ptr(ptr).to_str().unwrap()) - } + (!ptr.is_null()).then(|| CStr::from_ptr(ptr).to_str().unwrap()) } } + /// Get the numerical field id of this tree cursor's current node. + /// + /// See also [`field_name`](TreeCursor::field_name). + #[doc(alias = "ts_tree_cursor_current_depth")] + #[must_use] + pub fn depth(&self) -> u32 { + unsafe { ffi::ts_tree_cursor_current_depth(&self.0) } + } + + /// Get the index of the cursor's current node out of all of the + /// descendants of the original node that the cursor was constructed with + #[doc(alias = "ts_tree_cursor_current_descendant_index")] + #[must_use] + pub fn descendant_index(&self) -> usize { + unsafe { ffi::ts_tree_cursor_current_descendant_index(&self.0) as usize } + } + /// Move this cursor to the first child of its current node. /// - /// This returns `true` if the cursor successfully moved, and returns `false` - /// if there were no children. + /// This returns `true` if the cursor successfully moved, and returns + /// `false` if there were no children. #[doc(alias = "ts_tree_cursor_goto_first_child")] pub fn goto_first_child(&mut self) -> bool { - return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) }; + unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) } + } + + /// Move this cursor to the last child of its current node. + /// + /// This returns `true` if the cursor successfully moved, and returns + /// `false` if there were no children. + /// + /// Note that this function may be slower than + /// [`goto_first_child`](TreeCursor::goto_first_child) because it needs to + /// iterate through all the children to compute the child's position. + #[doc(alias = "ts_tree_cursor_goto_last_child")] + pub fn goto_last_child(&mut self) -> bool { + unsafe { ffi::ts_tree_cursor_goto_last_child(&mut self.0) } } /// Move this cursor to the parent of its current node. /// - /// This returns `true` if the cursor successfully moved, and returns `false` - /// if there was no parent node (the cursor was already on the root node). + /// This returns `true` if the cursor successfully moved, and returns + /// `false` if there was no parent node (the cursor was already on the + /// root node). #[doc(alias = "ts_tree_cursor_goto_parent")] pub fn goto_parent(&mut self) -> bool { - return unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) }; + unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) } } /// Move this cursor to the next sibling of its current node. /// - /// This returns `true` if the cursor successfully moved, and returns `false` - /// if there was no next sibling node. + /// This returns `true` if the cursor successfully moved, and returns + /// `false` if there was no next sibling node. #[doc(alias = "ts_tree_cursor_goto_next_sibling")] pub fn goto_next_sibling(&mut self) -> bool { - return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) }; + unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) } + } + + /// Move the cursor to the node that is the nth descendant of + /// the original node that the cursor was constructed with, where + /// zero represents the original node itself. + #[doc(alias = "ts_tree_cursor_goto_descendant")] + pub fn goto_descendant(&mut self, descendant_index: usize) { + unsafe { ffi::ts_tree_cursor_goto_descendant(&mut self.0, descendant_index as u32) } } - /// Move this cursor to the first child of its current node that extends beyond - /// the given byte offset. + /// Move this cursor to the previous sibling of its current node. /// - /// This returns the index of the child node if one was found, and returns `None` - /// if no such child was found. + /// This returns `true` if the cursor successfully moved, and returns + /// `false` if there was no previous sibling node. + /// + /// Note, that this function may be slower than + /// [`goto_next_sibling`](TreeCursor::goto_next_sibling) due to how node + /// positions are stored. In the worst case, this will need to iterate + /// through all the children upto the previous sibling node to recalculate + /// its position. + #[doc(alias = "ts_tree_cursor_goto_previous_sibling")] + pub fn goto_previous_sibling(&mut self) -> bool { + unsafe { ffi::ts_tree_cursor_goto_previous_sibling(&mut self.0) } + } + + /// Move this cursor to the first child of its current node that extends + /// beyond the given byte offset. + /// + /// This returns the index of the child node if one was found, and returns + /// `None` if no such child was found. #[doc(alias = "ts_tree_cursor_goto_first_child_for_byte")] pub fn goto_first_child_for_byte(&mut self, index: usize) -> Option { let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) }; - if result < 0 { - None - } else { - Some(result as usize) - } + (result >= 0).then_some(result as usize) } - /// Move this cursor to the first child of its current node that extends beyond - /// the given byte offset. + /// Move this cursor to the first child of its current node that extends + /// beyond the given byte offset. /// - /// This returns the index of the child node if one was found, and returns `None` - /// if no such child was found. + /// This returns the index of the child node if one was found, and returns + /// `None` if no such child was found. #[doc(alias = "ts_tree_cursor_goto_first_child_for_point")] pub fn goto_first_child_for_point(&mut self, point: Point) -> Option { let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_point(&mut self.0, point.into()) }; - if result < 0 { - None - } else { - Some(result as usize) - } + (result >= 0).then_some(result as usize) } /// Re-initialize this tree cursor to start at a different node. #[doc(alias = "ts_tree_cursor_reset")] - pub fn reset(&mut self, node: Node<'a>) { + pub fn reset(&mut self, node: Node<'cursor>) { unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) }; } + + /// Re-initialize a tree cursor to the same position as another cursor. + /// + /// Unlike [`reset`](TreeCursor::reset), this will not lose parent + /// information and allows reusing already created cursors. + #[doc(alias = "ts_tree_cursor_reset_to")] + pub fn reset_to(&mut self, cursor: &Self) { + unsafe { ffi::ts_tree_cursor_reset_to(&mut self.0, &cursor.0) }; + } } -impl<'a> Clone for TreeCursor<'a> { +impl Clone for TreeCursor<'_> { fn clone(&self) -> Self { TreeCursor(unsafe { ffi::ts_tree_cursor_copy(&self.0) }, PhantomData) } } -impl<'a> Drop for TreeCursor<'a> { +impl Drop for TreeCursor<'_> { fn drop(&mut self) { unsafe { ffi::ts_tree_cursor_delete(&mut self.0) } } } +impl LookaheadIterator { + /// Get the current language of the lookahead iterator. + #[doc(alias = "ts_lookahead_iterator_language")] + #[must_use] + pub fn language(&self) -> LanguageRef<'_> { + LanguageRef( + unsafe { ffi::ts_lookahead_iterator_language(self.0.as_ptr()) }, + PhantomData, + ) + } + + /// Get the current symbol of the lookahead iterator. + #[doc(alias = "ts_lookahead_iterator_current_symbol")] + #[must_use] + pub fn current_symbol(&self) -> u16 { + unsafe { ffi::ts_lookahead_iterator_current_symbol(self.0.as_ptr()) } + } + + /// Get the current symbol name of the lookahead iterator. + #[doc(alias = "ts_lookahead_iterator_current_symbol_name")] + #[must_use] + pub fn current_symbol_name(&self) -> &'static str { + unsafe { + CStr::from_ptr(ffi::ts_lookahead_iterator_current_symbol_name( + self.0.as_ptr(), + )) + .to_str() + .unwrap() + } + } + + /// Reset the lookahead iterator. + /// + /// This returns `true` if the language was set successfully and `false` + /// otherwise. + #[doc(alias = "ts_lookahead_iterator_reset")] + pub fn reset(&mut self, language: &Language, state: u16) -> bool { + unsafe { ffi::ts_lookahead_iterator_reset(self.0.as_ptr(), language.0, state) } + } + + /// Reset the lookahead iterator to another state. + /// + /// This returns `true` if the iterator was reset to the given state and + /// `false` otherwise. + #[doc(alias = "ts_lookahead_iterator_reset_state")] + pub fn reset_state(&mut self, state: u16) -> bool { + unsafe { ffi::ts_lookahead_iterator_reset_state(self.0.as_ptr(), state) } + } + + /// Iterate symbol names. + pub fn iter_names(&mut self) -> impl Iterator + '_ { + LookaheadNamesIterator(self) + } +} + +impl Iterator for LookaheadNamesIterator<'_> { + type Item = &'static str; + + #[doc(alias = "ts_lookahead_iterator_next")] + fn next(&mut self) -> Option { + unsafe { ffi::ts_lookahead_iterator_next(self.0 .0.as_ptr()) } + .then(|| self.0.current_symbol_name()) + } +} + +impl Iterator for LookaheadIterator { + type Item = u16; + + #[doc(alias = "ts_lookahead_iterator_next")] + fn next(&mut self) -> Option { + // the first symbol is always `0` so we can safely skip it + unsafe { ffi::ts_lookahead_iterator_next(self.0.as_ptr()) }.then(|| self.current_symbol()) + } +} + +impl Drop for LookaheadIterator { + #[doc(alias = "ts_lookahead_iterator_delete")] + fn drop(&mut self) { + unsafe { ffi::ts_lookahead_iterator_delete(self.0.as_ptr()) } + } +} + impl Query { /// Create a new query from a string containing one or more S-expression /// patterns. @@ -1345,7 +1755,7 @@ impl Query { /// The query is associated with a particular language, and can only be run /// on syntax nodes parsed with that language. References to Queries can be /// shared between multiple threads. - pub fn new(language: Language, source: &str) -> Result { + pub fn new(language: &Language, source: &str) -> Result { let mut error_offset = 0u32; let mut error_type: ffi::TSQueryError = 0; let bytes = source.as_bytes(); @@ -1354,16 +1764,16 @@ impl Query { let ptr = unsafe { ffi::ts_query_new( language.0, - bytes.as_ptr() as *const c_char, + bytes.as_ptr().cast::(), bytes.len() as u32, - &mut error_offset as *mut u32, - &mut error_type as *mut ffi::TSQueryError, + std::ptr::addr_of_mut!(error_offset), + std::ptr::addr_of_mut!(error_type), ) }; // On failure, build an error based on the error code and offset. if ptr.is_null() { - if error_type == ffi::TSQueryError_TSQueryErrorLanguage { + if error_type == ffi::TSQueryErrorLanguage { return Err(QueryError { row: 0, column: 0, @@ -1380,7 +1790,7 @@ impl Query { let mut line_start = 0; let mut row = 0; let mut line_containing_error = None; - for line in source.split("\n") { + for line in source.lines() { let line_end = line_start + line.len() + 1; if line_end > offset { line_containing_error = Some(line); @@ -1395,31 +1805,28 @@ impl Query { let message; match error_type { // Error types that report names - ffi::TSQueryError_TSQueryErrorNodeType - | ffi::TSQueryError_TSQueryErrorField - | ffi::TSQueryError_TSQueryErrorCapture => { + ffi::TSQueryErrorNodeType | ffi::TSQueryErrorField | ffi::TSQueryErrorCapture => { let suffix = source.split_at(offset).1; let end_offset = suffix .find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-') - .unwrap_or(source.len()); + .unwrap_or(suffix.len()); message = suffix.split_at(end_offset).0.to_string(); kind = match error_type { - ffi::TSQueryError_TSQueryErrorNodeType => QueryErrorKind::NodeType, - ffi::TSQueryError_TSQueryErrorField => QueryErrorKind::Field, - ffi::TSQueryError_TSQueryErrorCapture => QueryErrorKind::Capture, + ffi::TSQueryErrorNodeType => QueryErrorKind::NodeType, + ffi::TSQueryErrorField => QueryErrorKind::Field, + ffi::TSQueryErrorCapture => QueryErrorKind::Capture, _ => unreachable!(), }; } // Error types that report positions _ => { - message = if let Some(line) = line_containing_error { - line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^" - } else { - "Unexpected EOF".to_string() - }; + message = line_containing_error.map_or_else( + || "Unexpected EOF".to_string(), + |line| line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^", + ); kind = match error_type { - ffi::TSQueryError_TSQueryErrorStructure => QueryErrorKind::Structure, + ffi::TSQueryErrorStructure => QueryErrorKind::Structure, _ => QueryErrorKind::Syntax, }; } @@ -1429,33 +1836,47 @@ impl Query { row, column, offset, - kind, message, + kind, }); } - let string_count = unsafe { ffi::ts_query_string_count(ptr) }; - let capture_count = unsafe { ffi::ts_query_capture_count(ptr) }; - let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr) as usize }; - let mut result = Query { - ptr: unsafe { NonNull::new_unchecked(ptr) }, - capture_names: Vec::with_capacity(capture_count as usize), - capture_quantifiers: Vec::with_capacity(pattern_count as usize), - text_predicates: Vec::with_capacity(pattern_count), - property_predicates: Vec::with_capacity(pattern_count), - property_settings: Vec::with_capacity(pattern_count), - general_predicates: Vec::with_capacity(pattern_count), + unsafe { Self::from_raw_parts(ptr, source) } + } + + #[doc(hidden)] + unsafe fn from_raw_parts(ptr: *mut ffi::TSQuery, source: &str) -> Result { + let ptr = { + struct TSQueryDrop(*mut ffi::TSQuery); + impl Drop for TSQueryDrop { + fn drop(&mut self) { + unsafe { ffi::ts_query_delete(self.0) } + } + } + TSQueryDrop(ptr) }; + let string_count = unsafe { ffi::ts_query_string_count(ptr.0) }; + let capture_count = unsafe { ffi::ts_query_capture_count(ptr.0) }; + let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr.0) as usize }; + + let mut capture_names = Vec::with_capacity(capture_count as usize); + let mut capture_quantifiers_vec = Vec::with_capacity(pattern_count as usize); + let mut text_predicates_vec = Vec::with_capacity(pattern_count); + let mut property_predicates_vec = Vec::with_capacity(pattern_count); + let mut property_settings_vec = Vec::with_capacity(pattern_count); + let mut general_predicates_vec = Vec::with_capacity(pattern_count); + // Build a vector of strings to store the capture names. for i in 0..capture_count { unsafe { let mut length = 0u32; let name = - ffi::ts_query_capture_name_for_id(ptr, i, &mut length as *mut u32) as *const u8; + ffi::ts_query_capture_name_for_id(ptr.0, i, std::ptr::addr_of_mut!(length)) + .cast::(); let name = slice::from_raw_parts(name, length as usize); let name = str::from_utf8_unchecked(name); - result.capture_names.push(name.to_string()); + capture_names.push(name); } } @@ -1464,11 +1885,11 @@ impl Query { let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); for j in 0..capture_count { unsafe { - let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr, i as u32, j); + let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr.0, i as u32, j); capture_quantifiers.push(quantifier.into()); } } - result.capture_quantifiers.push(capture_quantifiers); + capture_quantifiers_vec.push(capture_quantifiers.into()); } // Build a vector of strings to represent literal values used in predicates. @@ -1476,11 +1897,11 @@ impl Query { .map(|i| unsafe { let mut length = 0u32; let value = - ffi::ts_query_string_value_for_id(ptr, i as u32, &mut length as *mut u32) - as *const u8; + ffi::ts_query_string_value_for_id(ptr.0, i, std::ptr::addr_of_mut!(length)) + .cast::(); let value = slice::from_raw_parts(value, length as usize); let value = str::from_utf8_unchecked(value); - value.to_string() + value }) .collect::>(); @@ -1488,49 +1909,51 @@ impl Query { for i in 0..pattern_count { let predicate_steps = unsafe { let mut length = 0u32; - let raw_predicates = - ffi::ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32); - if length > 0 { - slice::from_raw_parts(raw_predicates, length as usize) - } else { - &[] - } + let raw_predicates = ffi::ts_query_predicates_for_pattern( + ptr.0, + i as u32, + std::ptr::addr_of_mut!(length), + ); + (length > 0) + .then(|| slice::from_raw_parts(raw_predicates, length as usize)) + .unwrap_or_default() }; - let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr, i as u32) }; + let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr.0, i as u32) }; let row = source .char_indices() .take_while(|(i, _)| *i < byte_offset as usize) .filter(|(_, c)| *c == '\n') .count(); - let type_done = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeDone; - let type_capture = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture; - let type_string = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeString; + use ffi::TSQueryPredicateStepType as T; + const TYPE_DONE: T = ffi::TSQueryPredicateStepTypeDone; + const TYPE_CAPTURE: T = ffi::TSQueryPredicateStepTypeCapture; + const TYPE_STRING: T = ffi::TSQueryPredicateStepTypeString; let mut text_predicates = Vec::new(); let mut property_predicates = Vec::new(); let mut property_settings = Vec::new(); let mut general_predicates = Vec::new(); - for p in predicate_steps.split(|s| s.type_ == type_done) { + for p in predicate_steps.split(|s| s.type_ == TYPE_DONE) { if p.is_empty() { continue; } - if p[0].type_ != type_string { + if p[0].type_ != TYPE_STRING { return Err(predicate_error( row, format!( "Expected predicate to start with a function name. Got @{}.", - result.capture_names[p[0].value_id as usize], + capture_names[p[0].value_id as usize], ), )); } // Build a predicate for each of the known predicate function names. - let operator_name = &string_values[p[0].value_id as usize]; - match operator_name.as_str() { - "eq?" | "not-eq?" => { + let operator_name = string_values[p[0].value_id as usize]; + match operator_name { + "eq?" | "not-eq?" | "any-eq?" | "any-not-eq?" => { if p.len() != 3 { return Err(predicate_error( row, @@ -1540,64 +1963,78 @@ impl Query { ), )); } - if p[1].type_ != type_capture { + if p[1].type_ != TYPE_CAPTURE { return Err(predicate_error(row, format!( "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } - let is_positive = operator_name == "eq?"; - text_predicates.push(if p[2].type_ == type_capture { - TextPredicate::CaptureEqCapture( + let is_positive = operator_name == "eq?" || operator_name == "any-eq?"; + let match_all = match operator_name { + "eq?" | "not-eq?" => true, + "any-eq?" | "any-not-eq?" => false, + _ => unreachable!(), + }; + text_predicates.push(if p[2].type_ == TYPE_CAPTURE { + TextPredicateCapture::EqCapture( p[1].value_id, p[2].value_id, is_positive, + match_all, ) } else { - TextPredicate::CaptureEqString( + TextPredicateCapture::EqString( p[1].value_id, - string_values[p[2].value_id as usize].clone(), + string_values[p[2].value_id as usize].to_string().into(), is_positive, + match_all, ) }); } - "match?" | "not-match?" => { + "match?" | "not-match?" | "any-match?" | "any-not-match?" => { if p.len() != 3 { return Err(predicate_error(row, format!( "Wrong number of arguments to #match? predicate. Expected 2, got {}.", p.len() - 1 ))); } - if p[1].type_ != type_capture { + if p[1].type_ != TYPE_CAPTURE { return Err(predicate_error(row, format!( "First argument to #match? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } - if p[2].type_ == type_capture { + if p[2].type_ == TYPE_CAPTURE { return Err(predicate_error(row, format!( "Second argument to #match? predicate must be a literal. Got capture @{}.", - result.capture_names[p[2].value_id as usize], + capture_names[p[2].value_id as usize], ))); } - let is_positive = operator_name == "match?"; + let is_positive = + operator_name == "match?" || operator_name == "any-match?"; + let match_all = match operator_name { + "match?" | "not-match?" => true, + "any-match?" | "any-not-match?" => false, + _ => unreachable!(), + }; let regex = &string_values[p[2].value_id as usize]; - text_predicates.push(TextPredicate::CaptureMatchString( + text_predicates.push(TextPredicateCapture::MatchString( p[1].value_id, regex::bytes::Regex::new(regex).map_err(|_| { - predicate_error(row, format!("Invalid regex '{}'", regex)) + predicate_error(row, format!("Invalid regex '{regex}'")) })?, is_positive, + match_all, )); } "set!" => property_settings.push(Self::parse_property( row, - &operator_name, - &result.capture_names, + operator_name, + &capture_names, &string_values, &p[1..], )?), @@ -1605,24 +2042,60 @@ impl Query { "is?" | "is-not?" => property_predicates.push(( Self::parse_property( row, - &operator_name, - &result.capture_names, + operator_name, + &capture_names, &string_values, &p[1..], )?, operator_name == "is?", )), + "any-of?" | "not-any-of?" => { + if p.len() < 2 { + return Err(predicate_error(row, format!( + "Wrong number of arguments to #any-of? predicate. Expected at least 1, got {}.", + p.len() - 1 + ))); + } + if p[1].type_ != TYPE_CAPTURE { + return Err(predicate_error(row, format!( + "First argument to #any-of? predicate must be a capture name. Got literal \"{}\".", + string_values[p[1].value_id as usize], + ))); + } + + let is_positive = operator_name == "any-of?"; + let mut values = Vec::new(); + for arg in &p[2..] { + if arg.type_ == TYPE_CAPTURE { + return Err(predicate_error(row, format!( + "Arguments to #any-of? predicate must be literals. Got capture @{}.", + capture_names[arg.value_id as usize], + ))); + } + values.push(string_values[arg.value_id as usize]); + } + text_predicates.push(TextPredicateCapture::AnyString( + p[1].value_id, + values + .iter() + .map(|x| (*x).to_string().into()) + .collect::>() + .into(), + is_positive, + )); + } + _ => general_predicates.push(QueryPredicate { - operator: operator_name.clone().into_boxed_str(), + operator: operator_name.to_string().into(), args: p[1..] .iter() .map(|a| { - if a.type_ == type_capture { + if a.type_ == TYPE_CAPTURE { QueryPredicateArg::Capture(a.value_id) } else { QueryPredicateArg::String( - string_values[a.value_id as usize].clone().into_boxed_str(), + string_values[a.value_id as usize].to_string().into(), ) } }) @@ -1631,32 +2104,37 @@ impl Query { } } - result - .text_predicates - .push(text_predicates.into_boxed_slice()); - result - .property_predicates - .push(property_predicates.into_boxed_slice()); - result - .property_settings - .push(property_settings.into_boxed_slice()); - result - .general_predicates - .push(general_predicates.into_boxed_slice()); + text_predicates_vec.push(text_predicates.into()); + property_predicates_vec.push(property_predicates.into()); + property_settings_vec.push(property_settings.into()); + general_predicates_vec.push(general_predicates.into()); } + + let result = Self { + ptr: unsafe { NonNull::new_unchecked(ptr.0) }, + capture_names: capture_names.into(), + capture_quantifiers: capture_quantifiers_vec.into(), + text_predicates: text_predicates_vec.into(), + property_predicates: property_predicates_vec.into(), + property_settings: property_settings_vec.into(), + general_predicates: general_predicates_vec.into(), + }; + + std::mem::forget(ptr); + Ok(result) } - /// Get the byte offset where the given pattern starts in the query's source. + /// Get the byte offset where the given pattern starts in the query's + /// source. #[doc(alias = "ts_query_start_byte_for_pattern")] + #[must_use] pub fn start_byte_for_pattern(&self, pattern_index: usize) -> usize { - if pattern_index >= self.text_predicates.len() { - panic!( - "Pattern index is {} but the pattern count is {}", - pattern_index, - self.text_predicates.len(), - ); - } + assert!( + pattern_index < self.text_predicates.len(), + "Pattern index is {pattern_index} but the pattern count is {}", + self.text_predicates.len(), + ); unsafe { ffi::ts_query_start_byte_for_pattern(self.ptr.as_ptr(), pattern_index as u32) as usize } @@ -1664,39 +2142,45 @@ impl Query { /// Get the number of patterns in the query. #[doc(alias = "ts_query_pattern_count")] + #[must_use] pub fn pattern_count(&self) -> usize { unsafe { ffi::ts_query_pattern_count(self.ptr.as_ptr()) as usize } } /// Get the names of the captures used in the query. - pub fn capture_names(&self) -> &[String] { + #[must_use] + pub const fn capture_names(&self) -> &[&str] { &self.capture_names } /// Get the quantifiers of the captures used in the query. - pub fn capture_quantifiers(&self, index: usize) -> &[CaptureQuantifier] { + #[must_use] + pub const fn capture_quantifiers(&self, index: usize) -> &[CaptureQuantifier] { &self.capture_quantifiers[index] } /// Get the index for a given capture name. + #[must_use] pub fn capture_index_for_name(&self, name: &str) -> Option { self.capture_names .iter() - .position(|n| n == name) + .position(|n| *n == name) .map(|ix| ix as u32) } /// Get the properties that are checked for the given pattern index. /// /// This includes predicates with the operators `is?` and `is-not?`. - pub fn property_predicates(&self, index: usize) -> &[(QueryProperty, bool)] { + #[must_use] + pub const fn property_predicates(&self, index: usize) -> &[(QueryProperty, bool)] { &self.property_predicates[index] } /// Get the properties that are set for the given pattern index. /// /// This includes predicates with the operator `set!`. - pub fn property_settings(&self, index: usize) -> &[QueryProperty] { + #[must_use] + pub const fn property_settings(&self, index: usize) -> &[QueryProperty] { &self.property_settings[index] } @@ -1707,20 +2191,21 @@ impl Query { /// * `eq?` and `not-eq?` /// * `is?` and `is-not?` /// * `set!` - pub fn general_predicates(&self, index: usize) -> &[QueryPredicate] { + #[must_use] + pub const fn general_predicates(&self, index: usize) -> &[QueryPredicate] { &self.general_predicates[index] } /// Disable a certain capture within a query. /// - /// This prevents the capture from being returned in matches, and also avoids any - /// resource usage associated with recording the capture. + /// This prevents the capture from being returned in matches, and also + /// avoids any resource usage associated with recording the capture. #[doc(alias = "ts_query_disable_capture")] pub fn disable_capture(&mut self, name: &str) { unsafe { ffi::ts_query_disable_capture( self.ptr.as_ptr(), - name.as_bytes().as_ptr() as *const c_char, + name.as_bytes().as_ptr().cast::(), name.len() as u32, ); } @@ -1728,8 +2213,8 @@ impl Query { /// Disable a certain pattern within a query. /// - /// This prevents the pattern from matching, and also avoids any resource usage - /// associated with the pattern. + /// This prevents the pattern from matching, and also avoids any resource + /// usage associated with the pattern. #[doc(alias = "ts_query_disable_pattern")] pub fn disable_pattern(&mut self, index: usize) { unsafe { ffi::ts_query_disable_pattern(self.ptr.as_ptr(), index as u32) } @@ -1737,21 +2222,24 @@ impl Query { /// Check if a given pattern within a query has a single root node. #[doc(alias = "ts_query_is_pattern_rooted")] + #[must_use] pub fn is_pattern_rooted(&self, index: usize) -> bool { unsafe { ffi::ts_query_is_pattern_rooted(self.ptr.as_ptr(), index as u32) } } /// Check if a given pattern within a query has a single root node. #[doc(alias = "ts_query_is_pattern_non_local")] + #[must_use] pub fn is_pattern_non_local(&self, index: usize) -> bool { unsafe { ffi::ts_query_is_pattern_non_local(self.ptr.as_ptr(), index as u32) } } /// Check if a given step in a query is 'definite'. /// - /// A query step is 'definite' if its parent pattern will be guaranteed to match - /// successfully once it reaches the step. + /// A query step is 'definite' if its parent pattern will be guaranteed to + /// match successfully once it reaches the step. #[doc(alias = "ts_query_is_pattern_guaranteed_at_step")] + #[must_use] pub fn is_pattern_guaranteed_at_step(&self, byte_offset: usize) -> bool { unsafe { ffi::ts_query_is_pattern_guaranteed_at_step(self.ptr.as_ptr(), byte_offset as u32) @@ -1761,16 +2249,15 @@ impl Query { fn parse_property( row: usize, function_name: &str, - capture_names: &[String], - string_values: &[String], + capture_names: &[&str], + string_values: &[&str], args: &[ffi::TSQueryPredicateStep], ) -> Result { - if args.len() == 0 || args.len() > 3 { + if args.is_empty() || args.len() > 3 { return Err(predicate_error( row, format!( - "Wrong number of arguments to {} predicate. Expected 1 to 3, got {}.", - function_name, + "Wrong number of arguments to {function_name} predicate. Expected 1 to 3, got {}.", args.len(), ), )); @@ -1781,13 +2268,13 @@ impl Query { let mut value = None; for arg in args { - if arg.type_ == ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture { + if arg.type_ == ffi::TSQueryPredicateStepTypeCapture { if capture_id.is_some() { return Err(predicate_error( row, format!( - "Invalid arguments to {} predicate. Unexpected second capture name @{}", - function_name, capture_names[arg.value_id as usize] + "Invalid arguments to {function_name} predicate. Unexpected second capture name @{}", + capture_names[arg.value_id as usize] ), )); } @@ -1795,13 +2282,13 @@ impl Query { } else if key.is_none() { key = Some(&string_values[arg.value_id as usize]); } else if value.is_none() { - value = Some(string_values[arg.value_id as usize].as_str()); + value = Some(string_values[arg.value_id as usize]); } else { return Err(predicate_error( row, format!( - "Invalid arguments to {} predicate. Unexpected third argument @{}", - function_name, string_values[arg.value_id as usize] + "Invalid arguments to {function_name} predicate. Unexpected third argument @{}", + string_values[arg.value_id as usize] ), )); } @@ -1810,36 +2297,42 @@ impl Query { if let Some(key) = key { Ok(QueryProperty::new(key, value, capture_id)) } else { - return Err(predicate_error( + Err(predicate_error( row, - format!( - "Invalid arguments to {} predicate. Missing key argument", - function_name, - ), - )); + format!("Invalid arguments to {function_name} predicate. Missing key argument",), + )) } } } +impl Default for QueryCursor { + fn default() -> Self { + Self::new() + } +} + impl QueryCursor { /// Create a new cursor for executing a given query. /// - /// The cursor stores the state that is needed to iteratively search for matches. + /// The cursor stores the state that is needed to iteratively search for + /// matches. #[doc(alias = "ts_query_cursor_new")] + #[must_use] pub fn new() -> Self { - QueryCursor { + Self { ptr: unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) }, } } /// Return the maximum number of in-progress matches for this cursor. #[doc(alias = "ts_query_cursor_match_limit")] + #[must_use] pub fn match_limit(&self) -> u32 { unsafe { ffi::ts_query_cursor_match_limit(self.ptr.as_ptr()) } } - /// Set the maximum number of in-progress matches for this cursor. The limit must be > 0 and - /// <= 65536. + /// Set the maximum number of in-progress matches for this cursor. The + /// limit must be > 0 and <= 65536. #[doc(alias = "ts_query_cursor_set_match_limit")] pub fn set_match_limit(&mut self, limit: u32) { unsafe { @@ -1847,61 +2340,65 @@ impl QueryCursor { } } - /// Check if, on its last execution, this cursor exceeded its maximum number of - /// in-progress matches. + /// Check if, on its last execution, this cursor exceeded its maximum number + /// of in-progress matches. #[doc(alias = "ts_query_cursor_did_exceed_match_limit")] + #[must_use] pub fn did_exceed_match_limit(&self) -> bool { unsafe { ffi::ts_query_cursor_did_exceed_match_limit(self.ptr.as_ptr()) } } /// Iterate over all of the matches in the order that they were found. /// - /// Each match contains the index of the pattern that matched, and a list of captures. - /// Because multiple patterns can match the same set of nodes, one match may contain - /// captures that appear *before* some of the captures from a previous match. + /// Each match contains the index of the pattern that matched, and a list of + /// captures. Because multiple patterns can match the same set of nodes, + /// one match may contain captures that appear *before* some of the + /// captures from a previous match. #[doc(alias = "ts_query_cursor_exec")] - pub fn matches<'a, 'tree: 'a, T: TextProvider<'a> + 'a>( - &'a mut self, - query: &'a Query, + pub fn matches<'query, 'cursor: 'query, 'tree, T: TextProvider, I: AsRef<[u8]>>( + &'cursor mut self, + query: &'query Query, node: Node<'tree>, text_provider: T, - ) -> QueryMatches<'a, 'tree, T> { + ) -> QueryMatches<'query, 'tree, T, I> { let ptr = self.ptr.as_ptr(); unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) }; QueryMatches { ptr, query, text_provider, - buffer1: Default::default(), - buffer2: Default::default(), - _tree: PhantomData, + buffer1: Vec::default(), + buffer2: Vec::default(), + _phantom: PhantomData, } } - /// Iterate over all of the individual captures in the order that they appear. + /// Iterate over all of the individual captures in the order that they + /// appear. /// - /// This is useful if you don't care about which pattern matched, and just want a single, - /// ordered sequence of captures. + /// This is useful if you don't care about which pattern matched, and just + /// want a single, ordered sequence of captures. #[doc(alias = "ts_query_cursor_exec")] - pub fn captures<'a, 'tree: 'a, T: TextProvider<'a> + 'a>( - &'a mut self, - query: &'a Query, + pub fn captures<'query, 'cursor: 'query, 'tree, T: TextProvider, I: AsRef<[u8]>>( + &'cursor mut self, + query: &'query Query, node: Node<'tree>, text_provider: T, - ) -> QueryCaptures<'a, 'tree, T> { + ) -> QueryCaptures<'query, 'tree, T, I> { let ptr = self.ptr.as_ptr(); - unsafe { ffi::ts_query_cursor_exec(self.ptr.as_ptr(), query.ptr.as_ptr(), node.0) }; + unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) }; QueryCaptures { ptr, query, text_provider, - buffer1: Default::default(), - buffer2: Default::default(), - _tree: PhantomData, + buffer1: Vec::default(), + buffer2: Vec::default(), + _phantom: PhantomData, } } - /// Set the range in which the query will be executed, in terms of byte offsets. + /// Set the range in which the query will be executed, in terms of byte + /// offsets. #[doc(alias = "ts_query_cursor_set_byte_range")] pub fn set_byte_range(&mut self, range: ops::Range) -> &mut Self { unsafe { @@ -1914,7 +2411,8 @@ impl QueryCursor { self } - /// Set the range in which the query will be executed, in terms of rows and columns. + /// Set the range in which the query will be executed, in terms of rows and + /// columns. #[doc(alias = "ts_query_cursor_set_point_range")] pub fn set_point_range(&mut self, range: ops::Range) -> &mut Self { unsafe { @@ -1926,10 +2424,36 @@ impl QueryCursor { } self } + + /// Set the maximum start depth for a query cursor. + /// + /// This prevents cursors from exploring children nodes at a certain depth. + /// Note if a pattern includes many children, then they will still be + /// checked. + /// + /// The zero max start depth value can be used as a special behavior and + /// it helps to destructure a subtree by staying on a node and using + /// captures for interested parts. Note that the zero max start depth + /// only limit a search depth for a pattern's root node but other nodes + /// that are parts of the pattern may be searched at any depth what + /// defined by the pattern structure. + /// + /// Set to `None` to remove the maximum start depth. + #[doc(alias = "ts_query_cursor_set_max_start_depth")] + pub fn set_max_start_depth(&mut self, max_start_depth: Option) -> &mut Self { + unsafe { + ffi::ts_query_cursor_set_max_start_depth( + self.ptr.as_ptr(), + max_start_depth.unwrap_or(u32::MAX), + ); + } + self + } } -impl<'a, 'tree> QueryMatch<'a, 'tree> { - pub fn id(&self) -> u32 { +impl<'tree> QueryMatch<'_, 'tree> { + #[must_use] + pub const fn id(&self) -> u32 { self.id } @@ -1942,116 +2466,153 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> { &self, capture_ix: u32, ) -> impl Iterator> + '_ { - self.captures.iter().filter_map(move |capture| { - if capture.index == capture_ix { - Some(capture.node) - } else { - None - } - }) + self.captures + .iter() + .filter_map(move |capture| (capture.index == capture_ix).then_some(capture.node)) } - fn new(m: ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self { + fn new(m: &ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self { QueryMatch { cursor, id: m.id, pattern_index: m.pattern_index as usize, - captures: if m.capture_count > 0 { - unsafe { + captures: (m.capture_count > 0) + .then(|| unsafe { slice::from_raw_parts( - m.captures as *const QueryCapture<'tree>, + m.captures.cast::>(), m.capture_count as usize, ) - } - } else { - &[] - }, + }) + .unwrap_or_default(), } } - fn satisfies_text_predicates( + fn satisfies_text_predicates>( &self, query: &Query, buffer1: &mut Vec, buffer2: &mut Vec, - text_provider: &mut impl TextProvider<'a>, + text_provider: &mut impl TextProvider, ) -> bool { - fn get_text<'a, 'b: 'a, I: Iterator>( + struct NodeText<'a, T> { buffer: &'a mut Vec, - mut chunks: I, - ) -> &'a [u8] { - let first_chunk = chunks.next().unwrap_or(&[]); - if let Some(next_chunk) = chunks.next() { - buffer.clear(); - buffer.extend_from_slice(first_chunk); - buffer.extend_from_slice(next_chunk); - for chunk in chunks { - buffer.extend_from_slice(chunk); + first_chunk: Option, + } + impl<'a, T: AsRef<[u8]>> NodeText<'a, T> { + fn new(buffer: &'a mut Vec) -> Self { + Self { + buffer, + first_chunk: None, + } + } + + fn get_text(&mut self, chunks: &mut impl Iterator) -> &[u8] { + self.first_chunk = chunks.next(); + if let Some(next_chunk) = chunks.next() { + self.buffer.clear(); + self.buffer + .extend_from_slice(self.first_chunk.as_ref().unwrap().as_ref()); + self.buffer.extend_from_slice(next_chunk.as_ref()); + for chunk in chunks { + self.buffer.extend_from_slice(chunk.as_ref()); + } + self.buffer.as_slice() + } else if let Some(ref first_chunk) = self.first_chunk { + first_chunk.as_ref() + } else { + &[] } - buffer.as_slice() - } else { - first_chunk } } + let mut node_text1 = NodeText::new(buffer1); + let mut node_text2 = NodeText::new(buffer2); + query.text_predicates[self.pattern_index] .iter() .all(|predicate| match predicate { - TextPredicate::CaptureEqCapture(i, j, is_positive) => { - let node1 = self.nodes_for_capture_index(*i).next(); - let node2 = self.nodes_for_capture_index(*j).next(); - match (node1, node2) { - (Some(node1), Some(node2)) => { - let text1 = get_text(buffer1, text_provider.text(node1)); - let text2 = get_text(buffer2, text_provider.text(node2)); - (text1 == text2) == *is_positive + TextPredicateCapture::EqCapture(i, j, is_positive, match_all_nodes) => { + let mut nodes_1 = self.nodes_for_capture_index(*i); + let mut nodes_2 = self.nodes_for_capture_index(*j); + while let (Some(node1), Some(node2)) = (nodes_1.next(), nodes_2.next()) { + let mut text1 = text_provider.text(node1); + let mut text2 = text_provider.text(node2); + let text1 = node_text1.get_text(&mut text1); + let text2 = node_text2.get_text(&mut text2); + if (text1 == text2) != *is_positive && *match_all_nodes { + return false; + } + if (text1 == text2) == *is_positive && !*match_all_nodes { + return true; + } + } + nodes_1.next().is_none() && nodes_2.next().is_none() + } + TextPredicateCapture::EqString(i, s, is_positive, match_all_nodes) => { + let nodes = self.nodes_for_capture_index(*i); + for node in nodes { + let mut text = text_provider.text(node); + let text = node_text1.get_text(&mut text); + if (text == s.as_bytes()) != *is_positive && *match_all_nodes { + return false; + } + if (text == s.as_bytes()) == *is_positive && !*match_all_nodes { + return true; } - _ => true, } + true } - TextPredicate::CaptureEqString(i, s, is_positive) => { - let node = self.nodes_for_capture_index(*i).next(); - match node { - Some(node) => { - let text = get_text(buffer1, text_provider.text(node)); - (text == s.as_bytes()) == *is_positive + TextPredicateCapture::MatchString(i, r, is_positive, match_all_nodes) => { + let nodes = self.nodes_for_capture_index(*i); + for node in nodes { + let mut text = text_provider.text(node); + let text = node_text1.get_text(&mut text); + if (r.is_match(text)) != *is_positive && *match_all_nodes { + return false; + } + if (r.is_match(text)) == *is_positive && !*match_all_nodes { + return true; } - None => true, } + true } - TextPredicate::CaptureMatchString(i, r, is_positive) => { - let node = self.nodes_for_capture_index(*i).next(); - match node { - Some(node) => { - let text = get_text(buffer1, text_provider.text(node)); - r.is_match(text) == *is_positive + TextPredicateCapture::AnyString(i, v, is_positive) => { + let nodes = self.nodes_for_capture_index(*i); + for node in nodes { + let mut text = text_provider.text(node); + let text = node_text1.get_text(&mut text); + if (v.iter().any(|s| text == s.as_bytes())) != *is_positive { + return false; } - None => true, } + true } }) } } impl QueryProperty { + #[must_use] pub fn new(key: &str, value: Option<&str>, capture_id: Option) -> Self { - QueryProperty { + Self { capture_id, - key: key.to_string().into_boxed_str(), - value: value.map(|s| s.to_string().into_boxed_str()), + key: key.to_string().into(), + value: value.map(|s| s.to_string().into()), } } } -impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryMatches<'a, 'tree, T> { - type Item = QueryMatch<'a, 'tree>; +impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> Iterator + for QueryMatches<'query, 'tree, T, I> +{ + type Item = QueryMatch<'query, 'tree>; fn next(&mut self) -> Option { unsafe { loop { let mut m = MaybeUninit::::uninit(); if ffi::ts_query_cursor_next_match(self.ptr, m.as_mut_ptr()) { - let result = QueryMatch::new(m.assume_init(), self.ptr); + let result = QueryMatch::new(&m.assume_init(), self.ptr); if result.satisfies_text_predicates( self.query, &mut self.buffer1, @@ -2068,8 +2629,10 @@ impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryMatches<'a, 'tree, T> { } } -impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> { - type Item = (QueryMatch<'a, 'tree>, usize); +impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> Iterator + for QueryCaptures<'query, 'tree, T, I> +{ + type Item = (QueryMatch<'query, 'tree>, usize); fn next(&mut self) -> Option { unsafe { @@ -2079,9 +2642,9 @@ impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> { if ffi::ts_query_cursor_next_capture( self.ptr, m.as_mut_ptr(), - &mut capture_index as *mut u32, + std::ptr::addr_of_mut!(capture_index), ) { - let result = QueryMatch::new(m.assume_init(), self.ptr); + let result = QueryMatch::new(&m.assume_init(), self.ptr); if result.satisfies_text_predicates( self.query, &mut self.buffer1, @@ -2089,9 +2652,8 @@ impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> { &mut self.text_provider, ) { return Some((result, capture_index as usize)); - } else { - result.remove(); } + result.remove(); } else { return None; } @@ -2100,7 +2662,7 @@ impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> { } } -impl<'a, 'tree, T: TextProvider<'a>> QueryMatches<'a, 'tree, T> { +impl, I: AsRef<[u8]>> QueryMatches<'_, '_, T, I> { #[doc(alias = "ts_query_cursor_set_byte_range")] pub fn set_byte_range(&mut self, range: ops::Range) { unsafe { @@ -2116,7 +2678,7 @@ impl<'a, 'tree, T: TextProvider<'a>> QueryMatches<'a, 'tree, T> { } } -impl<'a, 'tree, T: TextProvider<'a>> QueryCaptures<'a, 'tree, T> { +impl, I: AsRef<[u8]>> QueryCaptures<'_, '_, T, I> { #[doc(alias = "ts_query_cursor_set_byte_range")] pub fn set_byte_range(&mut self, range: ops::Range) { unsafe { @@ -2132,7 +2694,7 @@ impl<'a, 'tree, T: TextProvider<'a>> QueryCaptures<'a, 'tree, T> { } } -impl<'cursor, 'tree> fmt::Debug for QueryMatch<'cursor, 'tree> { +impl fmt::Debug for QueryMatch<'_, '_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, @@ -2142,19 +2704,20 @@ impl<'cursor, 'tree> fmt::Debug for QueryMatch<'cursor, 'tree> { } } -impl<'a, F, I> TextProvider<'a> for F +impl TextProvider for F where - F: FnMut(Node) -> I, - I: Iterator + 'a, + F: FnMut(Node) -> R, + R: Iterator, + I: AsRef<[u8]>, { - type I = I; + type I = R; fn text(&mut self, node: Node) -> Self::I { (self)(node) } } -impl<'a> TextProvider<'a> for &'a [u8] { +impl<'a> TextProvider<&'a [u8]> for &'a [u8] { type I = iter::Once<&'a [u8]>; fn text(&mut self, node: Node) -> Self::I { @@ -2181,22 +2744,23 @@ impl Drop for QueryCursor { } impl Point { - pub fn new(row: usize, column: usize) -> Self { - Point { row, column } + #[must_use] + pub const fn new(row: usize, column: usize) -> Self { + Self { row, column } } } impl fmt::Display for Point { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "({}, {})", self.row, self.column) } } -impl Into for Point { - fn into(self) -> ffi::TSPoint { - ffi::TSPoint { - row: self.row as u32, - column: self.column as u32, +impl From for ffi::TSPoint { + fn from(val: Point) -> Self { + Self { + row: val.row as u32, + column: val.column as u32, } } } @@ -2210,13 +2774,13 @@ impl From for Point { } } -impl Into for Range { - fn into(self) -> ffi::TSRange { - ffi::TSRange { - start_byte: self.start_byte as u32, - end_byte: self.end_byte as u32, - start_point: self.start_point.into(), - end_point: self.end_point.into(), +impl From for ffi::TSRange { + fn from(val: Range) -> Self { + Self { + start_byte: val.start_byte as u32, + end_byte: val.end_byte as u32, + start_point: val.start_point.into(), + end_point: val.end_point.into(), } } } @@ -2232,21 +2796,22 @@ impl From for Range { } } -impl<'a> Into for &'a InputEdit { - fn into(self) -> ffi::TSInputEdit { - ffi::TSInputEdit { - start_byte: self.start_byte as u32, - old_end_byte: self.old_end_byte as u32, - new_end_byte: self.new_end_byte as u32, - start_point: self.start_position.into(), - old_end_point: self.old_end_position.into(), - new_end_point: self.new_end_position.into(), +impl From<&'_ InputEdit> for ffi::TSInputEdit { + fn from(val: &'_ InputEdit) -> Self { + Self { + start_byte: val.start_byte as u32, + old_end_byte: val.old_end_byte as u32, + new_end_byte: val.new_end_byte as u32, + start_point: val.start_position.into(), + old_end_point: val.old_end_position.into(), + new_end_point: val.new_end_position.into(), } } } impl<'a> LossyUtf8<'a> { - pub fn new(bytes: &'a [u8]) -> Self { + #[must_use] + pub const fn new(bytes: &'a [u8]) -> Self { LossyUtf8 { bytes, in_replacement: false, @@ -2291,7 +2856,8 @@ impl<'a> Iterator for LossyUtf8<'a> { } } -fn predicate_error(row: usize, message: String) -> QueryError { +#[must_use] +const fn predicate_error(row: usize, message: String) -> QueryError { QueryError { kind: QueryErrorKind::Predicate, row, @@ -2328,7 +2894,9 @@ impl fmt::Display for QueryError { QueryErrorKind::Syntax => "Invalid syntax:\n", QueryErrorKind::Language => "", }; - if msg.len() > 0 { + if msg.is_empty() { + write!(f, "{}", self.message) + } else { write!( f, "Query error at {}:{}. {}{}", @@ -2337,18 +2905,131 @@ impl fmt::Display for QueryError { msg, self.message ) - } else { - write!(f, "{}", self.message) } } } +#[doc(hidden)] +#[must_use] +pub fn format_sexp(sexp: &str, initial_indent_level: usize) -> String { + let mut indent_level = initial_indent_level; + let mut formatted = String::new(); + let mut has_field = false; + + let mut c_iter = sexp.chars().peekable(); + let mut s = String::with_capacity(sexp.len()); + let mut quote = '\0'; + let mut saw_paren = false; + let mut did_last = false; + + let mut fetch_next_str = |next: &mut String| { + next.clear(); + while let Some(c) = c_iter.next() { + if c == '\'' || c == '"' { + quote = c; + } else if c == ' ' || (c == ')' && quote != '\0') { + if let Some(next_c) = c_iter.peek() { + if *next_c == quote { + next.push(c); + next.push(*next_c); + c_iter.next(); + quote = '\0'; + continue; + } + } + break; + } + if c == ')' { + saw_paren = true; + break; + } + next.push(c); + } + + // at the end + if c_iter.peek().is_none() && next.is_empty() { + if saw_paren { + // but did we see a ) before ending? + saw_paren = false; + return Some(()); + } + if !did_last { + // but did we account for the end empty string as if we're splitting? + did_last = true; + return Some(()); + } + return None; + } + Some(()) + }; + + while fetch_next_str(&mut s).is_some() { + if s.is_empty() && indent_level > 0 { + // ")" + indent_level -= 1; + write!(formatted, ")").unwrap(); + } else if s.starts_with('(') { + if has_field { + has_field = false; + } else { + if indent_level > 0 { + writeln!(formatted).unwrap(); + for _ in 0..indent_level { + write!(formatted, " ").unwrap(); + } + } + indent_level += 1; + } + + // "(node_name" + write!(formatted, "{s}").unwrap(); + + // "(MISSING node_name" or "(UNEXPECTED 'x'" + if s.starts_with("(MISSING") || s.starts_with("(UNEXPECTED") { + fetch_next_str(&mut s).unwrap(); + if s.is_empty() { + while indent_level > 0 { + indent_level -= 1; + write!(formatted, ")").unwrap(); + } + } else { + write!(formatted, " {s}").unwrap(); + } + } + } else if s.ends_with(':') { + // "field:" + writeln!(formatted).unwrap(); + for _ in 0..indent_level { + write!(formatted, " ").unwrap(); + } + write!(formatted, "{s} ").unwrap(); + has_field = true; + indent_level += 1; + } + } + + formatted +} + +pub fn wasm_stdlib_symbols() -> impl Iterator { + const WASM_STDLIB_SYMBOLS: &str = include_str!(concat!(env!("OUT_DIR"), "/stdlib-symbols.txt")); + + WASM_STDLIB_SYMBOLS + .lines() + .map(|s| s.trim_matches(|c| c == '"' || c == ',')) +} + extern "C" { fn free(ptr: *mut c_void); } static mut FREE_FN: unsafe extern "C" fn(ptr: *mut c_void) = free; +/// Sets the memory allocation functions that the core library should use. +/// +/// # Safety +/// +/// This function uses FFI and mutates a static global. #[doc(alias = "ts_set_allocator")] pub unsafe fn set_allocator( new_malloc: Option *mut c_void>, @@ -2365,12 +3046,28 @@ impl error::Error for LanguageError {} impl error::Error for QueryError {} unsafe impl Send for Language {} -unsafe impl Send for Parser {} -unsafe impl Send for Query {} -unsafe impl Send for QueryCursor {} -unsafe impl Send for Tree {} unsafe impl Sync for Language {} + +unsafe impl Send for Node<'_> {} +unsafe impl Sync for Node<'_> {} + +unsafe impl Send for LookaheadIterator {} +unsafe impl Sync for LookaheadIterator {} + +unsafe impl Send for LookaheadNamesIterator<'_> {} +unsafe impl Sync for LookaheadNamesIterator<'_> {} + +unsafe impl Send for Parser {} unsafe impl Sync for Parser {} + +unsafe impl Send for Query {} unsafe impl Sync for Query {} + +unsafe impl Send for QueryCursor {} unsafe impl Sync for QueryCursor {} + +unsafe impl Send for Tree {} unsafe impl Sync for Tree {} + +unsafe impl Send for TreeCursor<'_> {} +unsafe impl Sync for TreeCursor<'_> {} diff --git a/lib/binding_rust/util.rs b/lib/binding_rust/util.rs index 5eda71f..89970e9 100644 --- a/lib/binding_rust/util.rs +++ b/lib/binding_rust/util.rs @@ -1,6 +1,7 @@ -use super::FREE_FN; use std::os::raw::c_void; +use super::FREE_FN; + /// A raw pointer and a length, exposed as an iterator. pub struct CBufferIter { ptr: *mut T, @@ -9,7 +10,7 @@ pub struct CBufferIter { } impl CBufferIter { - pub unsafe fn new(ptr: *mut T, count: usize) -> Self { + pub const unsafe fn new(ptr: *mut T, count: usize) -> Self { Self { ptr, count, i: 0 } } } @@ -23,7 +24,7 @@ impl Iterator for CBufferIter { None } else { self.i += 1; - Some(unsafe { *self.ptr.offset(i as isize) }) + Some(unsafe { *self.ptr.add(i) }) } } @@ -37,6 +38,8 @@ impl ExactSizeIterator for CBufferIter {} impl Drop for CBufferIter { fn drop(&mut self) { - unsafe { (FREE_FN)(self.ptr as *mut c_void) }; + if !self.ptr.is_null() { + unsafe { (FREE_FN)(self.ptr.cast::()) }; + } } } diff --git a/lib/binding_rust/wasm_language.rs b/lib/binding_rust/wasm_language.rs new file mode 100644 index 0000000..2b44dc8 --- /dev/null +++ b/lib/binding_rust/wasm_language.rs @@ -0,0 +1,143 @@ +use std::{ + error, + ffi::{CStr, CString}, + fmt, + mem::{self, MaybeUninit}, + os::raw::c_char, +}; + +pub use wasmtime_c_api::wasmtime; + +use crate::{ffi, Language, LanguageError, Parser, FREE_FN}; + +// Force Cargo to include wasmtime-c-api as a dependency of this crate, +// even though it is only used by the C code. +#[allow(unused)] +fn _use_wasmtime() { + wasmtime_c_api::wasm_engine_new(); +} + +#[repr(C)] +#[derive(Clone)] +#[allow(non_camel_case_types)] +pub struct wasm_engine_t { + pub(crate) engine: wasmtime::Engine, +} + +pub struct WasmStore(*mut ffi::TSWasmStore); + +#[derive(Debug, PartialEq, Eq)] +pub struct WasmError { + pub kind: WasmErrorKind, + pub message: String, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum WasmErrorKind { + Parse, + Compile, + Instantiate, + Other, +} + +impl WasmStore { + pub fn new(engine: wasmtime::Engine) -> Result { + unsafe { + let mut error = MaybeUninit::::uninit(); + let engine = Box::new(wasm_engine_t { engine }); + let store = ffi::ts_wasm_store_new( + (Box::leak(engine) as *mut wasm_engine_t).cast(), + error.as_mut_ptr(), + ); + if store.is_null() { + Err(WasmError::new(error.assume_init())) + } else { + Ok(Self(store)) + } + } + } + + pub fn load_language(&mut self, name: &str, bytes: &[u8]) -> Result { + let name = CString::new(name).unwrap(); + unsafe { + let mut error = MaybeUninit::::uninit(); + let language = ffi::ts_wasm_store_load_language( + self.0, + name.as_ptr(), + bytes.as_ptr().cast::(), + bytes.len() as u32, + error.as_mut_ptr(), + ); + if language.is_null() { + Err(WasmError::new(error.assume_init())) + } else { + Ok(Language(language)) + } + } + } + + #[must_use] + pub fn language_count(&self) -> usize { + unsafe { ffi::ts_wasm_store_language_count(self.0) } + } +} + +impl WasmError { + unsafe fn new(error: ffi::TSWasmError) -> Self { + let message = CStr::from_ptr(error.message).to_str().unwrap().to_string(); + (FREE_FN)(error.message.cast()); + Self { + kind: match error.kind { + ffi::TSWasmErrorKindParse => WasmErrorKind::Parse, + ffi::TSWasmErrorKindCompile => WasmErrorKind::Compile, + ffi::TSWasmErrorKindInstantiate => WasmErrorKind::Instantiate, + _ => WasmErrorKind::Other, + }, + message, + } + } +} + +impl Language { + #[must_use] + pub fn is_wasm(&self) -> bool { + unsafe { ffi::ts_language_is_wasm(self.0) } + } +} + +impl Parser { + pub fn set_wasm_store(&mut self, store: WasmStore) -> Result<(), LanguageError> { + unsafe { ffi::ts_parser_set_wasm_store(self.0.as_ptr(), store.0) }; + mem::forget(store); + Ok(()) + } + + pub fn take_wasm_store(&mut self) -> Option { + let ptr = unsafe { ffi::ts_parser_take_wasm_store(self.0.as_ptr()) }; + if ptr.is_null() { + None + } else { + Some(WasmStore(ptr)) + } + } +} + +impl Drop for WasmStore { + fn drop(&mut self) { + unsafe { ffi::ts_wasm_store_delete(self.0) }; + } +} + +impl fmt::Display for WasmError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let kind = match self.kind { + WasmErrorKind::Parse => "Failed to parse wasm", + WasmErrorKind::Compile => "Failed to compile wasm", + WasmErrorKind::Instantiate => "Failed to instantiate wasm module", + WasmErrorKind::Other => "Unknown error", + }; + write!(f, "{kind}: {}", self.message) + } +} + +impl error::Error for WasmError {} diff --git a/lib/binding_web/.eslintrc.js b/lib/binding_web/.eslintrc.js new file mode 100644 index 0000000..38709eb --- /dev/null +++ b/lib/binding_web/.eslintrc.js @@ -0,0 +1,22 @@ +module.exports = { + 'env': { + 'commonjs': true, + 'es2021': true, + }, + 'extends': 'google', + 'overrides': [ + ], + 'parserOptions': { + 'ecmaVersion': 'latest', + 'sourceType': 'module', + }, + 'rules': { + 'indent': ['error', 2, {'SwitchCase': 1}], + 'max-len': [ + 'error', + {'code': 120, 'ignoreComments': true, 'ignoreUrls': true, 'ignoreStrings': true, 'ignoreTemplateLiterals': true}, + ], + 'require-jsdoc': 0, + 'new-cap': 0, + }, +}; diff --git a/lib/binding_web/README.md b/lib/binding_web/README.md index a75cd9f..ce5def9 100644 --- a/lib/binding_web/README.md +++ b/lib/binding_web/README.md @@ -1,14 +1,18 @@ -Web Tree-sitter -=============== +# Web Tree-sitter + +[![npmjs.com badge]][npmjs.com] + +[npmjs.com]: https://www.npmjs.org/package/web-tree-sitter +[npmjs.com badge]: https://img.shields.io/npm/v/web-tree-sitter.svg?color=%23BF4A4A WebAssembly bindings to the [Tree-sitter](https://github.com/tree-sitter/tree-sitter) parsing library. ### Setup -You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/latest) and load them using a standalone script: +You can download the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/latest) and load them using a standalone script: ```html - '; + const htmlTree = parser.parse(sourceCode); + const scriptContentNode = htmlTree.rootNode.child(1).child(1); + assert.equal(scriptContentNode.type, 'raw_text'); + + parser.setLanguage(JavaScript); + assert.deepEqual(parser.getIncludedRanges(), [{ + startIndex: 0, + endIndex: 2147483647, + startPosition: {row: 0, column: 0}, + endPosition: {row: 4294967295, column: 2147483647}, + }]); + const ranges = [{ + startIndex: scriptContentNode.startIndex, + endIndex: scriptContentNode.endIndex, + startPosition: scriptContentNode.startPosition, + endPosition: scriptContentNode.endPosition, + }]; + const jsTree = parser.parse( + sourceCode, + null, + {includedRanges: ranges}, + ); + assert.deepEqual(parser.getIncludedRanges(), ranges); + + assert.equal( + jsTree.rootNode.toString(), + '(program (expression_statement (call_expression ' + + 'function: (member_expression object: (identifier) property: (property_identifier)) ' + + 'arguments: (arguments (string (string_fragment))))))', ); + assert.deepEqual(jsTree.rootNode.startPosition, {row: 0, column: sourceCode.indexOf('console')}); }); }); - describe(".parse", () => { + describe('multiple included ranges', () => { + it('parses the text within multiple ranges', () => { + parser.setLanguage(JavaScript); + const sourceCode = 'html `
Hello, ${name.toUpperCase()}, it\'s ${now()}.
`'; + const jsTree = parser.parse(sourceCode); + const templateStringNode = jsTree.rootNode.descendantForIndex(sourceCode.indexOf('`<'), sourceCode.indexOf('>`')); + assert.equal(templateStringNode.type, 'template_string'); + + const openQuoteNode = templateStringNode.child(0); + const interpolationNode1 = templateStringNode.child(2); + const interpolationNode2 = templateStringNode.child(4); + const closeQuoteNode = templateStringNode.child(6); + + parser.setLanguage(HTML); + const htmlRanges = [ + { + startIndex: openQuoteNode.endIndex, + startPosition: openQuoteNode.endPosition, + endIndex: interpolationNode1.startIndex, + endPosition: interpolationNode1.startPosition, + }, + { + startIndex: interpolationNode1.endIndex, + startPosition: interpolationNode1.endPosition, + endIndex: interpolationNode2.startIndex, + endPosition: interpolationNode2.startPosition, + }, + { + startIndex: interpolationNode2.endIndex, + startPosition: interpolationNode2.endPosition, + endIndex: closeQuoteNode.startIndex, + endPosition: closeQuoteNode.startPosition, + }, + ]; + const htmlTree = parser.parse(sourceCode, null, {includedRanges: htmlRanges}); + + assert.equal( + htmlTree.rootNode.toString(), + '(document (element' + + ' (start_tag (tag_name))' + + ' (text)' + + ' (element (start_tag (tag_name)) (end_tag (tag_name)))' + + ' (text)' + + ' (end_tag (tag_name))))', + ); + assert.deepEqual(htmlTree.getIncludedRanges(), htmlRanges); + + const divElementNode = htmlTree.rootNode.child(0); + const helloTextNode = divElementNode.child(1); + const bElementNode = divElementNode.child(2); + const bStartTagNode = bElementNode.child(0); + const bEndTagNode = bElementNode.child(1); + + assert.equal(helloTextNode.type, 'text'); + assert.equal(helloTextNode.startIndex, sourceCode.indexOf('Hello')); + assert.equal(helloTextNode.endIndex, sourceCode.indexOf(' ')); + + assert.equal(bStartTagNode.type, 'start_tag'); + assert.equal(bStartTagNode.startIndex, sourceCode.indexOf('')); + assert.equal(bStartTagNode.endIndex, sourceCode.indexOf('${now()}')); + + assert.equal(bEndTagNode.type, 'end_tag'); + assert.equal(bEndTagNode.startIndex, sourceCode.indexOf('')); + assert.equal(bEndTagNode.endIndex, sourceCode.indexOf('.
')); + }); + }); + + describe('an included range containing mismatched positions', () => { + it('parses the text within the range', () => { + const sourceCode = '
test
{_ignore_this_part_}'; + + parser.setLanguage(HTML); + + const endIndex = sourceCode.indexOf('{_ignore_this_part_'); + + const rangeToParse = { + startIndex: 0, + startPosition: {row: 10, column: 12}, + endIndex, + endPosition: {row: 10, column: 12 + endIndex}, + }; + + const htmlTree = parser.parse(sourceCode, null, {includedRanges: [rangeToParse]}); + + assert.deepEqual(htmlTree.getIncludedRanges()[0], rangeToParse); + + assert.equal( + htmlTree.rootNode.toString(), + '(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))', + ); + }); + }); + + describe('.parse', () => { let tree; beforeEach(() => { tree = null; - parser.setLanguage(JavaScript) + parser.setLanguage(JavaScript); }); afterEach(() => { if (tree) tree.delete(); }); - it("reads from the given input", () => { - const parts = ["first", "_", "second", "_", "third"]; + it('reads from the given input', () => { + const parts = ['first', '_', 'second', '_', 'third']; tree = parser.parse(() => parts.shift()); - assert.equal(tree.rootNode.toString(), "(program (expression_statement (identifier)))"); + assert.equal(tree.rootNode.toString(), '(program (expression_statement (identifier)))'); }); - it("stops reading when the input callback return something that's not a string", () => { - const parts = ["abc", "def", "ghi", {}, {}, {}, "second-word", " "]; + it('stops reading when the input callback return something that\'s not a string', () => { + const parts = ['abc', 'def', 'ghi', {}, {}, {}, 'second-word', ' ']; tree = parser.parse(() => parts.shift()); assert.equal( tree.rootNode.toString(), - "(program (expression_statement (identifier)))" + '(program (expression_statement (identifier)))', ); assert.equal(tree.rootNode.endIndex, 9); assert.equal(parts.length, 2); }); - it("throws an exception when the given input is not a function", () => { - assert.throws(() => parser.parse(null), "Argument must be a string or a function"); - assert.throws(() => parser.parse(5), "Argument must be a string or a function"); - assert.throws(() => parser.parse({}), "Argument must be a string or a function"); + it('throws an exception when the given input is not a function', () => { + assert.throws(() => parser.parse(null), 'Argument must be a string or a function'); + assert.throws(() => parser.parse(5), 'Argument must be a string or a function'); + assert.throws(() => parser.parse({}), 'Argument must be a string or a function'); }); - it("handles long input strings", () => { + it('handles long input strings', () => { const repeatCount = 10000; - const inputString = "[" + "0,".repeat(repeatCount) + "]"; + const inputString = `[${Array(repeatCount).fill('0').join(',')}]`; tree = parser.parse(inputString); - assert.equal(tree.rootNode.type, "program"); + assert.equal(tree.rootNode.type, 'program'); assert.equal(tree.rootNode.firstChild.firstChild.namedChildCount, repeatCount); }).timeout(5000); - it("can use the bash parser", async () => { + it('can use the bash parser', async () => { parser.setLanguage(await Parser.Language.load(languageURL('bash'))); - tree = parser.parse("FOO=bar echo < err.txt > hello.txt \nhello\nEOF"); + tree = parser.parse('FOO=bar echo < err.txt > hello.txt \nhello${FOO}\nEOF'); assert.equal( tree.rootNode.toString(), - '(program (redirected_statement ' + - 'body: (command ' + - '(variable_assignment ' + - 'name: (variable_name) ' + - 'value: (word)) ' + - 'name: (command_name (word))) ' + - 'redirect: (heredoc_redirect (heredoc_start)) ' + - 'redirect: (file_redirect descriptor: (file_descriptor) destination: (word)) ' + - 'redirect: (file_redirect destination: (word))) ' + - '(heredoc_body))' + '(program ' + + '(redirected_statement ' + + 'body: (command ' + + '(variable_assignment name: (variable_name) value: (word)) ' + + 'name: (command_name (word))) ' + + 'redirect: (heredoc_redirect (heredoc_start) ' + + 'redirect: (file_redirect descriptor: (file_descriptor) destination: (word)) ' + + 'redirect: (file_redirect destination: (word)) ' + + '(heredoc_body ' + + '(expansion (variable_name)) (heredoc_content)) (heredoc_end))))', ); }).timeout(5000); - it("can use the c++ parser", async () => { + it('can use the c++ parser', async () => { parser.setLanguage(await Parser.Language.load(languageURL('cpp'))); - tree = parser.parse("const char *s = R\"EOF(HELLO WORLD)EOF\";"); + tree = parser.parse('const char *s = R"EOF(HELLO WORLD)EOF";'); assert.equal( tree.rootNode.toString(), '(translation_unit (declaration ' + @@ -153,22 +286,22 @@ describe("Parser", () => { 'type: (primitive_type) ' + 'declarator: (init_declarator ' + 'declarator: (pointer_declarator declarator: (identifier)) ' + - 'value: (raw_string_literal delimiter: (raw_string_delimiter) (raw_string_content) (raw_string_delimiter)))))' + 'value: (raw_string_literal delimiter: (raw_string_delimiter) (raw_string_content) (raw_string_delimiter)))))', ); }).timeout(5000); - it("can use the HTML parser", async () => { + it('can use the HTML parser', async () => { parser.setLanguage(await Parser.Language.load(languageURL('html'))); - tree = parser.parse("
"); + tree = parser.parse('
'); assert.equal( tree.rootNode.toString(), - '(fragment (element (start_tag (tag_name)) (element (start_tag (tag_name)) (element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))) (end_tag (tag_name))))' + '(document (element (start_tag (tag_name)) (element (start_tag (tag_name)) (element (start_tag (tag_name)) (end_tag (tag_name))) (end_tag (tag_name))) (end_tag (tag_name))))', ); }).timeout(5000); - it("can use the python parser", async () => { + it('can use the python parser', async () => { parser.setLanguage(await Parser.Language.load(languageURL('python'))); - tree = parser.parse("class A:\n def b():\n c()"); + tree = parser.parse('class A:\n def b():\n c()'); assert.equal( tree.rootNode.toString(), '(module (class_definition ' + @@ -179,25 +312,25 @@ describe("Parser", () => { 'parameters: (parameters) ' + 'body: (block (expression_statement (call ' + 'function: (identifier) ' + - 'arguments: (argument_list))))))))' + 'arguments: (argument_list))))))))', ); }).timeout(5000); - it("can use the rust parser", async () => { + it('can use the rust parser', async () => { parser.setLanguage(await Parser.Language.load(languageURL('rust'))); - tree = parser.parse("const x: &'static str = r###\"hello\"###;"); + tree = parser.parse('const x: &\'static str = r###"hello"###;'); assert.equal( tree.rootNode.toString(), '(source_file (const_item ' + 'name: (identifier) ' + 'type: (reference_type (lifetime (identifier)) type: (primitive_type)) ' + - 'value: (raw_string_literal)))' + 'value: (raw_string_literal (string_content))))', ); }).timeout(5000); - it("can use the typescript parser", async () => { + it('can use the typescript parser', async () => { parser.setLanguage(await Parser.Language.load(languageURL('typescript'))); - tree = parser.parse("a()\nb()\n[c]"); + tree = parser.parse('a()\nb()\n[c]'); assert.equal( tree.rootNode.toString(), '(program ' + @@ -206,13 +339,13 @@ describe("Parser", () => { 'object: (call_expression ' + 'function: (identifier) ' + 'arguments: (arguments)) ' + - 'index: (identifier))))' + 'index: (identifier))))', ); }).timeout(5000); - it("can use the tsx parser", async () => { + it('can use the tsx parser', async () => { parser.setLanguage(await Parser.Language.load(languageURL('tsx'))); - tree = parser.parse("a()\nb()\n[c]"); + tree = parser.parse('a()\nb()\n[c]'); assert.equal( tree.rootNode.toString(), '(program ' + @@ -221,17 +354,17 @@ describe("Parser", () => { 'object: (call_expression ' + 'function: (identifier) ' + 'arguments: (arguments)) ' + - 'index: (identifier))))' + 'index: (identifier))))', ); }).timeout(5000); it('parses only the text within the `includedRanges` if they are specified', () => { - const sourceCode = "<% foo() %> <% bar %>"; + const sourceCode = '<% foo() %> <% bar %>'; const start1 = sourceCode.indexOf('foo'); - const end1 = start1 + 5 + const end1 = start1 + 5; const start2 = sourceCode.indexOf('bar'); - const end2 = start2 + 3 + const end2 = start2 + 3; const tree = parser.parse(sourceCode, null, { includedRanges: [ @@ -239,20 +372,21 @@ describe("Parser", () => { startIndex: start1, endIndex: end1, startPosition: {row: 0, column: start1}, - endPosition: {row: 0, column: end1} + endPosition: {row: 0, column: end1}, }, { startIndex: start2, endIndex: end2, startPosition: {row: 0, column: start2}, - endPosition: {row: 0, column: end2} + endPosition: {row: 0, column: end2}, }, - ] + ], }); assert.equal( tree.rootNode.toString(), - '(program (expression_statement (call_expression function: (identifier) arguments: (arguments))) (expression_statement (identifier)))' + '(program (expression_statement (call_expression function: (identifier) arguments: (arguments))) (expression_statement (identifier)))', ); - }) - });}); + }); + }); +}); diff --git a/lib/binding_web/test/query-test.js b/lib/binding_web/test/query-test.js index 2b2aebe..fad6b3c 100644 --- a/lib/binding_web/test/query-test.js +++ b/lib/binding_web/test/query-test.js @@ -1,10 +1,10 @@ -const { assert } = require("chai"); -let Parser, JavaScript; +const {assert} = require('chai'); +let Parser; let JavaScript; -describe("Query", () => { - let parser, tree, query; +describe('Query', () => { + let parser; let tree; let query; - before(async () => ({ Parser, JavaScript } = await require("./helper"))); + before(async () => ({Parser, JavaScript} = await require('./helper'))); beforeEach(() => { parser = new Parser().setLanguage(JavaScript); @@ -16,73 +16,75 @@ describe("Query", () => { if (query) query.delete(); }); - describe("construction", () => { - it("throws an error on invalid patterns", () => { + describe('construction', () => { + it('throws an error on invalid patterns', () => { assert.throws(() => { - JavaScript.query("(function_declaration wat)"); - }, "Bad syntax at offset 22: 'wat)'..."); + JavaScript.query('(function_declaration wat)'); + }, 'Bad syntax at offset 22: \'wat)\'...'); assert.throws(() => { - JavaScript.query("(non_existent)"); - }, "Bad node name 'non_existent'"); + JavaScript.query('(non_existent)'); + }, 'Bad node name \'non_existent\''); assert.throws(() => { - JavaScript.query("(a)"); - }, "Bad node name 'a'"); + JavaScript.query('(a)'); + }, 'Bad node name \'a\''); assert.throws(() => { - JavaScript.query("(function_declaration non_existent:(identifier))"); - }, "Bad field name 'non_existent'"); + JavaScript.query('(function_declaration non_existent:(identifier))'); + }, 'Bad field name \'non_existent\''); assert.throws(() => { - JavaScript.query("(function_declaration name:(statement_block))"); - }, "Bad pattern structure at offset 22: 'name:(statement_block))'"); + JavaScript.query('(function_declaration name:(statement_block))'); + }, 'Bad pattern structure at offset 22: \'name:(statement_block))\''); }); - it("throws an error on invalid predicates", () => { + it('throws an error on invalid predicates', () => { assert.throws(() => { - JavaScript.query("((identifier) @abc (#eq? @ab hi))"); - }, "Bad capture name @ab"); + JavaScript.query('((identifier) @abc (#eq? @ab hi))'); + }, 'Bad capture name @ab'); assert.throws(() => { - JavaScript.query("((identifier) @abc (#eq? @ab hi))"); - }, "Bad capture name @ab"); + JavaScript.query('((identifier) @abc (#eq? @ab hi))'); + }, 'Bad capture name @ab'); assert.throws(() => { - JavaScript.query("((identifier) @abc (#eq?))"); - }, "Wrong number of arguments to `#eq?` predicate. Expected 2, got 0"); + JavaScript.query('((identifier) @abc (#eq?))'); + }, 'Wrong number of arguments to `#eq?` predicate. Expected 2, got 0'); assert.throws(() => { - JavaScript.query("((identifier) @a (eq? @a @a @a))"); - }, "Wrong number of arguments to `#eq?` predicate. Expected 2, got 3"); + JavaScript.query('((identifier) @a (#eq? @a @a @a))'); + }, 'Wrong number of arguments to `#eq?` predicate. Expected 2, got 3'); }); }); - describe(".matches", () => { - it("returns all of the matches for the given query", () => { - tree = parser.parse("function one() { two(); function three() {} }"); + describe('.matches', () => { + it('returns all of the matches for the given query', () => { + tree = parser.parse('function one() { two(); function three() {} }'); query = JavaScript.query(` (function_declaration name: (identifier) @fn-def) (call_expression function: (identifier) @fn-ref) `); const matches = query.matches(tree.rootNode); assert.deepEqual(formatMatches(matches), [ - { pattern: 0, captures: [{ name: "fn-def", text: "one" }] }, - { pattern: 1, captures: [{ name: "fn-ref", text: "two" }] }, - { pattern: 0, captures: [{ name: "fn-def", text: "three" }] }, + {pattern: 0, captures: [{name: 'fn-def', text: 'one'}]}, + {pattern: 1, captures: [{name: 'fn-ref', text: 'two'}]}, + {pattern: 0, captures: [{name: 'fn-def', text: 'three'}]}, ]); }); - it("can search in a specified ranges", () => { - tree = parser.parse("[a, b,\nc, d,\ne, f,\ng, h]"); - query = JavaScript.query("(identifier) @element"); + it('can search in a specified ranges', () => { + tree = parser.parse('[a, b,\nc, d,\ne, f,\ng, h]'); + query = JavaScript.query('(identifier) @element'); const matches = query.matches( tree.rootNode, - { row: 1, column: 1 }, - { row: 3, column: 1 } + { + startPosition: {row: 1, column: 1}, + endPosition: {row: 3, column: 1}, + }, ); assert.deepEqual(formatMatches(matches), [ - { pattern: 0, captures: [{ name: "element", text: "d" }] }, - { pattern: 0, captures: [{ name: "element", text: "e" }] }, - { pattern: 0, captures: [{ name: "element", text: "f" }] }, - { pattern: 0, captures: [{ name: "element", text: "g" }] }, + {pattern: 0, captures: [{name: 'element', text: 'd'}]}, + {pattern: 0, captures: [{name: 'element', text: 'e'}]}, + {pattern: 0, captures: [{name: 'element', text: 'f'}]}, + {pattern: 0, captures: [{name: 'element', text: 'g'}]}, ]); }); - it("handles predicates that compare the text of capture to literal strings", () => { + it('handles predicates that compare the text of capture to literal strings', () => { tree = parser.parse(` giraffe(1, 2, []); helment([false]); @@ -103,14 +105,31 @@ describe("Query", () => { const matches = query.matches(tree.rootNode); assert.deepEqual(formatMatches(matches), [ - { pattern: 0, captures: [{name: "name", text: "giraffe" }] }, - { pattern: 0, captures: [{name: "name", text: "gross" }] }, + {pattern: 0, captures: [{name: 'name', text: 'giraffe'}]}, + {pattern: 0, captures: [{name: 'name', text: 'gross'}]}, + ]); + }); + + it('handles multiple matches where the first one is filtered', () => { + tree = parser.parse(` + const a = window.b; + `); + + query = JavaScript.query(` + ((identifier) @variable.builtin + (#match? @variable.builtin "^(arguments|module|console|window|document)$") + (#is-not? local)) + `); + + const matches = query.matches(tree.rootNode); + assert.deepEqual(formatMatches(matches), [ + {pattern: 0, captures: [{name: 'variable.builtin', text: 'window'}]}, ]); }); }); - describe(".captures", () => { - it("returns all of the captures for the given query, in order", () => { + describe('.captures', () => { + it('returns all of the captures for the given query, in order', () => { tree = parser.parse(` a({ bc: function de() { @@ -124,12 +143,12 @@ describe("Query", () => { query = JavaScript.query(` (pair key: _ @method.def - (function + (function_expression name: (identifier) @method.alias)) (variable_declarator name: _ @function.def - value: (function + value: (function_expression name: (identifier) @function.alias)) ":" @delimiter @@ -138,22 +157,22 @@ describe("Query", () => { const captures = query.captures(tree.rootNode); assert.deepEqual(formatCaptures(captures), [ - { name: "method.def", text: "bc" }, - { name: "delimiter", text: ":" }, - { name: "method.alias", text: "de" }, - { name: "function.def", text: "fg" }, - { name: "operator", text: "=" }, - { name: "function.alias", text: "hi" }, - { name: "method.def", text: "jk" }, - { name: "delimiter", text: ":" }, - { name: "method.alias", text: "lm" }, - { name: "function.def", text: "no" }, - { name: "operator", text: "=" }, - { name: "function.alias", text: "pq" }, + {name: 'method.def', text: 'bc'}, + {name: 'delimiter', text: ':'}, + {name: 'method.alias', text: 'de'}, + {name: 'function.def', text: 'fg'}, + {name: 'operator', text: '='}, + {name: 'function.alias', text: 'hi'}, + {name: 'method.def', text: 'jk'}, + {name: 'delimiter', text: ':'}, + {name: 'method.alias', text: 'lm'}, + {name: 'function.def', text: 'no'}, + {name: 'operator', text: '='}, + {name: 'function.alias', text: 'pq'}, ]); }); - it("handles conditions that compare the text of capture to literal strings", () => { + it('handles conditions that compare the text of capture to literal strings', () => { tree = parser.parse(` lambda panda @@ -179,20 +198,20 @@ describe("Query", () => { const captures = query.captures(tree.rootNode); assert.deepEqual(formatCaptures(captures), [ - { name: "variable", text: "panda" }, - { name: "variable", text: "toad" }, - { name: "variable", text: "ab" }, - { name: "variable", text: "require" }, - { name: "function.builtin", text: "require" }, - { name: "variable", text: "Cd" }, - { name: "constructor", text: "Cd" }, - { name: "variable", text: "EF" }, - { name: "constructor", text: "EF" }, - { name: "constant", text: "EF" }, + {name: 'variable', text: 'panda'}, + {name: 'variable', text: 'toad'}, + {name: 'variable', text: 'ab'}, + {name: 'variable', text: 'require'}, + {name: 'function.builtin', text: 'require'}, + {name: 'variable', text: 'Cd'}, + {name: 'constructor', text: 'Cd'}, + {name: 'variable', text: 'EF'}, + {name: 'constructor', text: 'EF'}, + {name: 'constant', text: 'EF'}, ]); }); - it("handles conditions that compare the text of capture to each other", () => { + it('handles conditions that compare the text of capture to each other', () => { tree = parser.parse(` ab = abc + 1; def = de + 1; @@ -211,12 +230,12 @@ describe("Query", () => { const captures = query.captures(tree.rootNode); assert.deepEqual(formatCaptures(captures), [ - { name: "id1", text: "ghi" }, - { name: "id2", text: "ghi" }, + {name: 'id1', text: 'ghi'}, + {name: 'id2', text: 'ghi'}, ]); }); - it("handles patterns with properties", () => { + it('handles patterns with properties', () => { tree = parser.parse(`a(b.c);`); query = JavaScript.query(` ((call_expression (identifier) @func) @@ -230,18 +249,18 @@ describe("Query", () => { const captures = query.captures(tree.rootNode); assert.deepEqual(formatCaptures(captures), [ - { name: "func", text: "a", setProperties: { foo: null, bar: "baz" } }, + {name: 'func', text: 'a', setProperties: {foo: null, bar: 'baz'}}, { - name: "prop", - text: "c", - assertedProperties: { foo: null }, - refutedProperties: { bar: "baz" }, + name: 'prop', + text: 'c', + assertedProperties: {foo: null}, + refutedProperties: {bar: 'baz'}, }, ]); assert.ok(!query.didExceedMatchLimit()); }); - it("detects queries with too many permutations to track", () => { + it('detects queries with too many permutations to track', () => { tree = parser.parse(` [ hello, hello, hello, hello, hello, hello, hello, hello, hello, hello, @@ -256,13 +275,96 @@ describe("Query", () => { (array (identifier) @pre (identifier) @post) `); - const captures = query.captures(tree.rootNode, null, null, {matchLimit: 32}); + query.captures(tree.rootNode, {matchLimit: 32}); assert.ok(query.didExceedMatchLimit()); }); + + it('handles quantified captures properly', () => { + let captures; + + tree = parser.parse(` + /// foo + /// bar + /// baz + `); + + query = JavaScript.query(` + ( + (comment)+ @foo + (#any-eq? @foo "/// foo") + ) + `); + + const expectCount = (tree, queryText, expectedCount) => { + query = JavaScript.query(queryText); + captures = query.captures(tree.rootNode); + assert.equal(captures.length, expectedCount); + }; + + expectCount( + tree, + `((comment)+ @foo (#any-eq? @foo "/// foo"))`, + 3, + ); + + expectCount( + tree, + `((comment)+ @foo (#eq? @foo "/// foo"))`, + 0, + ); + + expectCount( + tree, + `((comment)+ @foo (#any-not-eq? @foo "/// foo"))`, + 3, + ); + + expectCount( + tree, + `((comment)+ @foo (#not-eq? @foo "/// foo"))`, + 0, + ); + + expectCount( + tree, + `((comment)+ @foo (#match? @foo "^/// foo"))`, + 0, + ); + + expectCount( + tree, + `((comment)+ @foo (#any-match? @foo "^/// foo"))`, + 3, + ); + + expectCount( + tree, + `((comment)+ @foo (#not-match? @foo "^/// foo"))`, + 0, + ); + + expectCount( + tree, + `((comment)+ @foo (#not-match? @foo "fsdfsdafdfs"))`, + 3, + ); + + expectCount( + tree, + `((comment)+ @foo (#any-not-match? @foo "^///"))`, + 0, + ); + + expectCount( + tree, + `((comment)+ @foo (#any-not-match? @foo "^/// foo"))`, + 3, + ); + }); }); - describe(".predicatesForPattern(index)", () => { - it("returns all of the predicates as objects", () => { + describe('.predicatesForPattern(index)', () => { + it('returns all of the predicates as objects', () => { query = JavaScript.query(` ( (binary_expression @@ -281,35 +383,78 @@ describe("Query", () => { assert.deepEqual(query.predicatesForPattern(0), [ { - operator: "something?", + operator: 'something?', operands: [ - { type: "capture", name: "a" }, - { type: "capture", name: "b" }, + {type: 'capture', name: 'a'}, + {type: 'capture', name: 'b'}, ], }, { - operator: "something-else?", + operator: 'something-else?', operands: [ - { type: "capture", name: "a" }, - { type: "string", value: "A" }, - { type: "capture", name: "b" }, - { type: "string", value: "B" }, + {type: 'capture', name: 'a'}, + {type: 'string', value: 'A'}, + {type: 'capture', name: 'b'}, + {type: 'string', value: 'B'}, ], }, ]); assert.deepEqual(query.predicatesForPattern(1), [ { - operator: "hello!", - operands: [{ type: "capture", name: "c" }], + operator: 'hello!', + operands: [{type: 'capture', name: 'c'}], }, ]); assert.deepEqual(query.predicatesForPattern(2), []); }); }); + + describe('.disableCapture', () => { + it('disables a capture', () => { + const query = JavaScript.query(` + (function_declaration + (identifier) @name1 @name2 @name3 + (statement_block) @body1 @body2) + `); + + const source = 'function foo() { return 1; }'; + const tree = parser.parse(source); + + let matches = query.matches(tree.rootNode); + assert.deepEqual(formatMatches(matches), [ + { + pattern: 0, + captures: [ + {name: 'name1', text: 'foo'}, + {name: 'name2', text: 'foo'}, + {name: 'name3', text: 'foo'}, + {name: 'body1', text: '{ return 1; }'}, + {name: 'body2', text: '{ return 1; }'}, + ], + }, + ]); + + // disabling captures still works when there are multiple captures on a + // single node. + query.disableCapture('name2'); + matches = query.matches(tree.rootNode); + assert.deepEqual(formatMatches(matches), [ + { + pattern: 0, + captures: [ + {name: 'name1', text: 'foo'}, + {name: 'name3', text: 'foo'}, + {name: 'body1', text: '{ return 1; }'}, + {name: 'body2', text: '{ return 1; }'}, + ], + }, + ]); + }); + }); }); function formatMatches(matches) { - return matches.map(({ pattern, captures }) => ({ + return matches.map(({pattern, captures}) => ({ pattern, captures: formatCaptures(captures), })); diff --git a/lib/binding_web/test/tree-test.js b/lib/binding_web/test/tree-test.js index 8c04e63..c9216eb 100644 --- a/lib/binding_web/test/tree-test.js +++ b/lib/binding_web/test/tree-test.js @@ -1,11 +1,11 @@ const {assert} = require('chai'); -let Parser, JavaScript; +let Parser; let JavaScript; -describe("Tree", () => { - let parser, tree; +describe('Tree', () => { + let parser; let tree; before(async () => - ({Parser, JavaScript} = await require('./helper')) + ({Parser, JavaScript} = await require('./helper')), ); beforeEach(() => { @@ -18,14 +18,14 @@ describe("Tree", () => { }); describe('.edit', () => { - let input, edit + let input; let edit; it('updates the positions of nodes', () => { input = 'abc + cde'; tree = parser.parse(input); assert.equal( tree.rootNode.toString(), - "(program (expression_statement (binary_expression left: (identifier) right: (identifier))))" + '(program (expression_statement (binary_expression left: (identifier) right: (identifier))))', ); let sumNode = tree.rootNode.firstChild.firstChild; @@ -51,17 +51,17 @@ describe("Tree", () => { tree = parser.parse(input, tree); assert.equal( tree.rootNode.toString(), - "(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))" + '(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))', ); }); - it("handles non-ascii characters", () => { + it('handles non-ascii characters', () => { input = 'αβδ + cde'; tree = parser.parse(input); assert.equal( tree.rootNode.toString(), - "(program (expression_statement (binary_expression left: (identifier) right: (identifier))))" + '(program (expression_statement (binary_expression left: (identifier) right: (identifier))))', ); let variableNode = tree.rootNode.firstChild.firstChild.lastChild; @@ -76,65 +76,65 @@ describe("Tree", () => { tree = parser.parse(input, tree); assert.equal( tree.rootNode.toString(), - "(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))" + '(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))', ); }); }); - describe(".getChangedRanges(previous)", () => { - it("reports the ranges of text whose syntactic meaning has changed", () => { - let sourceCode = "abcdefg + hij"; + describe('.getChangedRanges(previous)', () => { + it('reports the ranges of text whose syntactic meaning has changed', () => { + let sourceCode = 'abcdefg + hij'; tree = parser.parse(sourceCode); assert.equal( tree.rootNode.toString(), - "(program (expression_statement (binary_expression left: (identifier) right: (identifier))))" + '(program (expression_statement (binary_expression left: (identifier) right: (identifier))))', ); - sourceCode = "abc + defg + hij"; + sourceCode = 'abc + defg + hij'; tree.edit({ startIndex: 2, oldEndIndex: 2, newEndIndex: 5, - startPosition: { row: 0, column: 2 }, - oldEndPosition: { row: 0, column: 2 }, - newEndPosition: { row: 0, column: 5 } + startPosition: {row: 0, column: 2}, + oldEndPosition: {row: 0, column: 2}, + newEndPosition: {row: 0, column: 5}, }); const tree2 = parser.parse(sourceCode, tree); assert.equal( tree2.rootNode.toString(), - "(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))" + '(program (expression_statement (binary_expression left: (binary_expression left: (identifier) right: (identifier)) right: (identifier))))', ); const ranges = tree.getChangedRanges(tree2); assert.deepEqual(ranges, [ { startIndex: 0, - endIndex: "abc + defg".length, - startPosition: { row: 0, column: 0 }, - endPosition: { row: 0, column: "abc + defg".length } - } + endIndex: 'abc + defg'.length, + startPosition: {row: 0, column: 0}, + endPosition: {row: 0, column: 'abc + defg'.length}, + }, ]); tree2.delete(); }); it('throws an exception if the argument is not a tree', () => { - tree = parser.parse("abcdefg + hij"); + tree = parser.parse('abcdefg + hij'); assert.throws(() => { tree.getChangedRanges({}); }, /Argument must be a Tree/); - }) + }); }); - describe(".walk()", () => { - let cursor + describe('.walk()', () => { + let cursor; afterEach(() => { cursor.delete(); - }) + }); it('returns a cursor that can be used to walk the tree', () => { tree = parser.parse('a * b + c / d'); @@ -146,7 +146,7 @@ describe("Tree", () => { startPosition: {row: 0, column: 0}, endPosition: {row: 0, column: 13}, startIndex: 0, - endIndex: 13 + endIndex: 13, }); assert(cursor.gotoFirstChild()); @@ -156,7 +156,7 @@ describe("Tree", () => { startPosition: {row: 0, column: 0}, endPosition: {row: 0, column: 13}, startIndex: 0, - endIndex: 13 + endIndex: 13, }); assert(cursor.gotoFirstChild()); @@ -166,7 +166,7 @@ describe("Tree", () => { startPosition: {row: 0, column: 0}, endPosition: {row: 0, column: 13}, startIndex: 0, - endIndex: 13 + endIndex: 13, }); assert(cursor.gotoFirstChild()); @@ -176,7 +176,7 @@ describe("Tree", () => { startPosition: {row: 0, column: 0}, endPosition: {row: 0, column: 5}, startIndex: 0, - endIndex: 5 + endIndex: 5, }); assert(cursor.gotoFirstChild()); @@ -187,10 +187,10 @@ describe("Tree", () => { startPosition: {row: 0, column: 0}, endPosition: {row: 0, column: 1}, startIndex: 0, - endIndex: 1 + endIndex: 1, }); - assert(!cursor.gotoFirstChild()) + assert(!cursor.gotoFirstChild()); assert(cursor.gotoNextSibling()); assert.equal(cursor.nodeText, '*'); assertCursorState(cursor, { @@ -199,7 +199,7 @@ describe("Tree", () => { startPosition: {row: 0, column: 2}, endPosition: {row: 0, column: 3}, startIndex: 2, - endIndex: 3 + endIndex: 3, }); assert(cursor.gotoNextSibling()); @@ -210,7 +210,7 @@ describe("Tree", () => { startPosition: {row: 0, column: 4}, endPosition: {row: 0, column: 5}, startIndex: 4, - endIndex: 5 + endIndex: 5, }); assert(!cursor.gotoNextSibling()); @@ -221,7 +221,7 @@ describe("Tree", () => { startPosition: {row: 0, column: 0}, endPosition: {row: 0, column: 5}, startIndex: 0, - endIndex: 5 + endIndex: 5, }); assert(cursor.gotoNextSibling()); @@ -231,7 +231,7 @@ describe("Tree", () => { startPosition: {row: 0, column: 6}, endPosition: {row: 0, column: 7}, startIndex: 6, - endIndex: 7 + endIndex: 7, }); assert(cursor.gotoNextSibling()); @@ -241,28 +241,57 @@ describe("Tree", () => { startPosition: {row: 0, column: 8}, endPosition: {row: 0, column: 13}, startIndex: 8, - endIndex: 13 + endIndex: 13, }); - // const childIndex = cursor.gotoFirstChildForIndex(12); - // assertCursorState(cursor, { - // nodeType: 'identifier', - // nodeIsNamed: true, - // startPosition: {row: 0, column: 12}, - // endPosition: {row: 0, column: 13}, - // startIndex: 12, - // endIndex: 13 - // }); - // assert.equal(childIndex, 2); - // assert(!cursor.gotoNextSibling()); - // assert(cursor.gotoParent()); + const copy = tree.walk(); + copy.resetTo(cursor); + + assert(copy.gotoPreviousSibling()); + assertCursorState(copy, { + nodeType: '+', + nodeIsNamed: false, + startPosition: {row: 0, column: 6}, + endPosition: {row: 0, column: 7}, + startIndex: 6, + endIndex: 7, + }); + + assert(copy.gotoPreviousSibling()); + assertCursorState(copy, { + nodeType: 'binary_expression', + nodeIsNamed: true, + startPosition: {row: 0, column: 0}, + endPosition: {row: 0, column: 5}, + startIndex: 0, + endIndex: 5, + }); + + assert(copy.gotoLastChild()); + assertCursorState(copy, { + nodeType: 'identifier', + nodeIsNamed: true, + startPosition: {row: 0, column: 4}, + endPosition: {row: 0, column: 5}, + startIndex: 4, + endIndex: 5, + }); + + assert(copy.gotoParent()); + assert(copy.gotoParent()); + assert.equal(copy.nodeType, 'binary_expression'); + assert(copy.gotoParent()); + assert.equal(copy.nodeType, 'expression_statement'); + assert(copy.gotoParent()); + assert.equal(copy.nodeType, 'program'); + assert(!copy.gotoParent()); assert(cursor.gotoParent()); - assert.equal(cursor.nodeType, 'binary_expression') + assert.equal(cursor.nodeType, 'binary_expression'); assert(cursor.gotoParent()); - assert.equal(cursor.nodeType, 'expression_statement') + assert.equal(cursor.nodeType, 'expression_statement'); assert(cursor.gotoParent()); - assert.equal(cursor.nodeType, 'program') + assert.equal(cursor.nodeType, 'program'); assert(!cursor.gotoParent()); }); @@ -272,26 +301,26 @@ describe("Tree", () => { cursor.gotoFirstChild(); cursor.gotoFirstChild(); - assert.equal(cursor.currentNode().type, 'call_expression'); - assert.equal(cursor.currentFieldName(), null); + assert.equal(cursor.currentNode.type, 'call_expression'); + assert.equal(cursor.currentFieldName, null); cursor.gotoFirstChild(); - assert.equal(cursor.currentNode().type, 'member_expression'); - assert.equal(cursor.currentFieldName(), 'function'); + assert.equal(cursor.currentNode.type, 'member_expression'); + assert.equal(cursor.currentFieldName, 'function'); cursor.gotoFirstChild(); - assert.equal(cursor.currentNode().type, 'identifier'); - assert.equal(cursor.currentFieldName(), 'object'); + assert.equal(cursor.currentNode.type, 'identifier'); + assert.equal(cursor.currentFieldName, 'object'); cursor.gotoNextSibling(); cursor.gotoNextSibling(); - assert.equal(cursor.currentNode().type, 'property_identifier'); - assert.equal(cursor.currentFieldName(), 'property'); + assert.equal(cursor.currentNode.type, 'property_identifier'); + assert.equal(cursor.currentFieldName, 'property'); cursor.gotoParent(); cursor.gotoNextSibling(); - assert.equal(cursor.currentNode().type, 'arguments'); - assert.equal(cursor.currentFieldName(), 'arguments'); + assert.equal(cursor.currentNode.type, 'arguments'); + assert.equal(cursor.currentFieldName, 'arguments'); }); it('returns a cursor that can be reset anywhere in the tree', () => { @@ -306,31 +335,31 @@ describe("Tree", () => { startPosition: {row: 0, column: 0}, endPosition: {row: 0, column: 5}, startIndex: 0, - endIndex: 5 + endIndex: 5, }); - cursor.gotoFirstChild() + cursor.gotoFirstChild(); assertCursorState(cursor, { nodeType: 'identifier', nodeIsNamed: true, startPosition: {row: 0, column: 0}, endPosition: {row: 0, column: 1}, startIndex: 0, - endIndex: 1 + endIndex: 1, }); assert(cursor.gotoParent()); assert(!cursor.gotoParent()); - }) + }); }); - describe(".copy", () => { - it("creates another tree that remains stable if the original tree is edited", () => { + describe('.copy', () => { + it('creates another tree that remains stable if the original tree is edited', () => { input = 'abc + cde'; tree = parser.parse(input); assert.equal( tree.rootNode.toString(), - "(program (expression_statement (binary_expression left: (identifier) right: (identifier))))" + '(program (expression_statement (binary_expression left: (identifier) right: (identifier))))', ); const tree2 = tree.copy(); @@ -342,10 +371,10 @@ describe("Tree", () => { const leftNode2 = tree2.rootNode.firstChild.firstChild.firstChild; const rightNode = tree.rootNode.firstChild.firstChild.lastChild; const rightNode2 = tree2.rootNode.firstChild.firstChild.lastChild; - assert.equal(leftNode.endIndex, 6) - assert.equal(leftNode2.endIndex, 3) - assert.equal(rightNode.startIndex, 9) - assert.equal(rightNode2.startIndex, 6) + assert.equal(leftNode.endIndex, 6); + assert.equal(leftNode2.endIndex, 3); + assert.equal(rightNode.startIndex, 9); + assert.equal(rightNode2.startIndex, 6); }); }); }); @@ -362,16 +391,16 @@ function spliceInput(input, startIndex, lengthRemoved, newText) { { startIndex, startPosition, oldEndIndex, oldEndPosition, - newEndIndex, newEndPosition - } + newEndIndex, newEndPosition, + }, ]; } function getExtent(text) { - let row = 0 + let row = 0; let index; - for (index = 0; index != -1; index = text.indexOf('\n', index)) { - index++ + for (index = 0; index !== -1; index = text.indexOf('\n', index)) { + index++; row++; } return {row, column: text.length - index}; @@ -385,9 +414,9 @@ function assertCursorState(cursor, params) { assert.deepEqual(cursor.startIndex, params.startIndex); assert.deepEqual(cursor.endIndex, params.endIndex); - const node = cursor.currentNode() + const node = cursor.currentNode; assert.equal(node.type, params.nodeType); - assert.equal(node.isNamed(), params.nodeIsNamed); + assert.equal(node.isNamed, params.nodeIsNamed); assert.deepEqual(node.startPosition, params.startPosition); assert.deepEqual(node.endPosition, params.endPosition); assert.deepEqual(node.startIndex, params.startIndex); diff --git a/lib/binding_web/tree-sitter-web.d.ts b/lib/binding_web/tree-sitter-web.d.ts index 016af4a..97a4807 100644 --- a/lib/binding_web/tree-sitter-web.d.ts +++ b/lib/binding_web/tree-sitter-web.d.ts @@ -1,19 +1,20 @@ declare module 'web-tree-sitter' { class Parser { /** - * + * * @param moduleOptions Optional emscripten module-object, see https://emscripten.org/docs/api_reference/module.html */ static init(moduleOptions?: object): Promise; delete(): void; - parse(input: string | Parser.Input, previousTree?: Parser.Tree, options?: Parser.Options): Parser.Tree; + parse(input: string | Parser.Input, oldTree?: Parser.Tree, options?: Parser.Options): Parser.Tree; + getIncludedRanges(): Parser.Range[]; + getTimeoutMicros(): number; + setTimeoutMicros(timeout: number): void; reset(): void; getLanguage(): Parser.Language; - setLanguage(language?: Parser.Language | undefined | null): void; + setLanguage(language?: Parser.Language | null): void; getLogger(): Parser.Logger; - setLogger(logFunc?: Parser.Logger | undefined | null): void; - setTimeoutMicros(value: number): void; - getTimeoutMicros(): number; + setLogger(logFunc?: Parser.Logger | false | null): void; } namespace Parser { @@ -27,10 +28,10 @@ declare module 'web-tree-sitter' { }; export type Range = { - startPosition: Point; - endPosition: Point; - startIndex: number; - endIndex: number; + startIndex: number, + endIndex: number, + startPosition: Point, + endPosition: Point }; export type Edit = { @@ -48,17 +49,26 @@ declare module 'web-tree-sitter' { type: "parse" | "lex" ) => void; - export type Input = ( - startIndex: number, - startPoint?: Point, - endIndex?: number, - ) => string | null; + export interface Input { + (index: number, position?: Point): string | null; + } export interface SyntaxNode { - id: number; tree: Tree; + id: number; + typeId: number; + grammarId: number; type: string; + grammarType: string; + isNamed: boolean; + isMissing: boolean; + isExtra: boolean; + hasChanges: boolean; + hasError: boolean; + isError: boolean; text: string; + parseState: number; + nextParseState: number; startPosition: Point; endPosition: Point; startIndex: number; @@ -76,27 +86,29 @@ declare module 'web-tree-sitter' { nextNamedSibling: SyntaxNode | null; previousSibling: SyntaxNode | null; previousNamedSibling: SyntaxNode | null; + descendantCount: number; - hasChanges(): boolean; - hasError(): boolean; equals(other: SyntaxNode): boolean; - isMissing(): boolean; - isNamed(): boolean; toString(): string; child(index: number): SyntaxNode | null; namedChild(index: number): SyntaxNode | null; - childForFieldId(fieldId: number): SyntaxNode | null; childForFieldName(fieldName: string): SyntaxNode | null; + childForFieldId(fieldId: number): SyntaxNode | null; + fieldNameForChild(childIndex: number): string | null; + childrenForFieldName(fieldName: string): Array; + childrenForFieldId(fieldId: number): Array; + firstChildForIndex(index: number): SyntaxNode | null; + firstNamedChildForIndex(index: number): SyntaxNode | null; descendantForIndex(index: number): SyntaxNode; descendantForIndex(startIndex: number, endIndex: number): SyntaxNode; - descendantsOfType(type: string | Array, startPosition?: Point, endPosition?: Point): Array; namedDescendantForIndex(index: number): SyntaxNode; namedDescendantForIndex(startIndex: number, endIndex: number): SyntaxNode; descendantForPosition(position: Point): SyntaxNode; descendantForPosition(startPosition: Point, endPosition: Point): SyntaxNode; namedDescendantForPosition(position: Point): SyntaxNode; namedDescendantForPosition(startPosition: Point, endPosition: Point): SyntaxNode; + descendantsOfType(types: String | Array, startPosition?: Point, endPosition?: Point): Array; walk(): TreeCursor; } @@ -104,6 +116,7 @@ declare module 'web-tree-sitter' { export interface TreeCursor { nodeType: string; nodeTypeId: number; + nodeStateId: number; nodeText: string; nodeId: number; nodeIsNamed: boolean; @@ -112,35 +125,94 @@ declare module 'web-tree-sitter' { endPosition: Point; startIndex: number; endIndex: number; + readonly currentNode: SyntaxNode; + readonly currentFieldName: string; + readonly currentFieldId: number; + readonly currentDepth: number; + readonly currentDescendantIndex: number; reset(node: SyntaxNode): void; + resetTo(cursor: TreeCursor): void; delete(): void; - currentNode(): SyntaxNode; - currentFieldId(): number; - currentFieldName(): string; gotoParent(): boolean; gotoFirstChild(): boolean; - gotoFirstChildForIndex(index: number): boolean; + gotoLastChild(): boolean; + gotoFirstChildForIndex(goalIndex: number): boolean; + gotoFirstChildForPosition(goalPosition: Point): boolean; gotoNextSibling(): boolean; + gotoPreviousSibling(): boolean; + gotoDescendant(goalDescendantIndex: number): void; } export interface Tree { readonly rootNode: SyntaxNode; + rootNodeWithOffset(offsetBytes: number, offsetExtent: Point): SyntaxNode; copy(): Tree; delete(): void; - edit(delta: Edit): Tree; + edit(edit: Edit): Tree; walk(): TreeCursor; getChangedRanges(other: Tree): Range[]; + getIncludedRanges(): Range[]; getEditedRange(other: Tree): Range; getLanguage(): Language; } + export interface QueryCapture { + name: string; + text?: string; + node: SyntaxNode; + setProperties?: { [prop: string]: string | null }; + assertedProperties?: { [prop: string]: string | null }; + refutedProperties?: { [prop: string]: string | null }; + } + + export interface QueryMatch { + pattern: number; + captures: QueryCapture[]; + } + + export type QueryOptions = { + startPosition?: Point; + endPosition?: Point; + startIndex?: number; + endIndex?: number; + matchLimit?: number; + maxStartDepth?: number; + }; + + export interface PredicateResult { + operator: string; + operands: { name: string; type: string }[]; + } + + export class Query { + captureNames: string[]; + readonly predicates: { [name: string]: Function }[]; + readonly setProperties: any[]; + readonly assertedProperties: any[]; + readonly refutedProperties: any[]; + readonly matchLimit: number; + + delete(): void; + captures(node: SyntaxNode, options?: QueryOptions): QueryCapture[]; + matches(node: SyntaxNode, options?: QueryOptions): QueryMatch[]; + predicatesForPattern(patternIndex: number): PredicateResult[]; + disableCapture(captureName: string): void; + disablePattern(patternIndex: number): void; + isPatternGuaranteedAtStep(byteOffset: number): boolean; + isPatternRooted(patternIndex: number): boolean; + isPatternNonLocal(patternIndex: number): boolean; + startIndexForPattern(patternIndex: number): number; + didExceedMatchLimit(): boolean; + } + class Language { static load(input: string | Uint8Array): Promise; readonly version: number; readonly fieldCount: number; + readonly stateCount: number; readonly nodeTypeCount: number; fieldNameForId(fieldId: number): string | null; @@ -149,31 +221,20 @@ declare module 'web-tree-sitter' { nodeTypeForId(typeId: number): string | null; nodeTypeIsNamed(typeId: number): boolean; nodeTypeIsVisible(typeId: number): boolean; + nextState(stateId: number, typeId: number): number; query(source: string): Query; + lookaheadIterator(stateId: number): LookaheadIterable | null; } - interface QueryCapture { - name: string; - node: SyntaxNode; - } - - interface QueryMatch { - pattern: number; - captures: QueryCapture[]; - } - - interface PredicateResult { - operator: string; - operands: { name: string; type: string }[]; - } - - class Query { - captureNames: string[]; + export class LookaheadIterable { + readonly language: Language; + readonly currentTypeId: number; + readonly currentType: string; delete(): void; - matches(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryMatch[]; - captures(node: SyntaxNode, startPosition?: Point, endPosition?: Point): QueryCapture[]; - predicatesForPattern(patternIndex: number): PredicateResult[]; + reset(language: Language, stateId: number): boolean; + resetState(stateId: number): boolean; + [Symbol.iterator](): Iterator; } } diff --git a/lib/compile_flags.txt b/lib/compile_flags.txt index e6043ca..3f08438 100644 --- a/lib/compile_flags.txt +++ b/lib/compile_flags.txt @@ -1,3 +1,4 @@ --std=c99 --Isrc --Iinclude \ No newline at end of file +-std=c11 +-Isrc/wasm +-Iinclude +-D TREE_SITTER_FEATURE_WASM diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index edc1c36..deb2364 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -1,11 +1,16 @@ #ifndef TREE_SITTER_API_H_ #define TREE_SITTER_API_H_ +#ifndef TREE_SITTER_HIDE_SYMBOLS +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC visibility push(default) +#endif +#endif + #ifdef __cplusplus extern "C" { #endif -#include #include #include #include @@ -33,6 +38,7 @@ extern "C" { /* Section - Types */ /*******************/ +typedef uint16_t TSStateId; typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; @@ -40,47 +46,48 @@ typedef struct TSParser TSParser; typedef struct TSTree TSTree; typedef struct TSQuery TSQuery; typedef struct TSQueryCursor TSQueryCursor; +typedef struct TSLookaheadIterator TSLookaheadIterator; -typedef enum { +typedef enum TSInputEncoding { TSInputEncodingUTF8, TSInputEncodingUTF16, } TSInputEncoding; -typedef enum { +typedef enum TSSymbolType { TSSymbolTypeRegular, TSSymbolTypeAnonymous, TSSymbolTypeAuxiliary, } TSSymbolType; -typedef struct { +typedef struct TSPoint { uint32_t row; uint32_t column; } TSPoint; -typedef struct { +typedef struct TSRange { TSPoint start_point; TSPoint end_point; uint32_t start_byte; uint32_t end_byte; } TSRange; -typedef struct { +typedef struct TSInput { void *payload; const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read); TSInputEncoding encoding; } TSInput; -typedef enum { +typedef enum TSLogType { TSLogTypeParse, TSLogTypeLex, } TSLogType; -typedef struct { +typedef struct TSLogger { void *payload; - void (*log)(void *payload, TSLogType, const char *); + void (*log)(void *payload, TSLogType log_type, const char *buffer); } TSLogger; -typedef struct { +typedef struct TSInputEdit { uint32_t start_byte; uint32_t old_end_byte; uint32_t new_end_byte; @@ -89,24 +96,24 @@ typedef struct { TSPoint new_end_point; } TSInputEdit; -typedef struct { +typedef struct TSNode { uint32_t context[4]; const void *id; const TSTree *tree; } TSNode; -typedef struct { +typedef struct TSTreeCursor { const void *tree; const void *id; - uint32_t context[2]; + uint32_t context[3]; } TSTreeCursor; -typedef struct { +typedef struct TSQueryCapture { TSNode node; uint32_t index; } TSQueryCapture; -typedef enum { +typedef enum TSQuantifier { TSQuantifierZero = 0, // must match the array initialization value TSQuantifierZeroOrOne, TSQuantifierZeroOrMore, @@ -114,25 +121,25 @@ typedef enum { TSQuantifierOneOrMore, } TSQuantifier; -typedef struct { +typedef struct TSQueryMatch { uint32_t id; uint16_t pattern_index; uint16_t capture_count; const TSQueryCapture *captures; } TSQueryMatch; -typedef enum { +typedef enum TSQueryPredicateStepType { TSQueryPredicateStepTypeDone, TSQueryPredicateStepTypeCapture, TSQueryPredicateStepTypeString, } TSQueryPredicateStepType; -typedef struct { +typedef struct TSQueryPredicateStep { TSQueryPredicateStepType type; uint32_t value_id; } TSQueryPredicateStep; -typedef enum { +typedef enum TSQueryError { TSQueryErrorNone = 0, TSQueryErrorSyntax, TSQueryErrorNodeType, @@ -154,7 +161,12 @@ TSParser *ts_parser_new(void); /** * Delete the parser, freeing all of the memory that it used. */ -void ts_parser_delete(TSParser *parser); +void ts_parser_delete(TSParser *self); + +/** + * Get the parser's current language. + */ +const TSLanguage *ts_parser_language(const TSParser *self); /** * Set the language that the parser should use for parsing. @@ -162,17 +174,12 @@ void ts_parser_delete(TSParser *parser); * Returns a boolean indicating whether or not the language was successfully * assigned. True means assignment succeeded. False means there was a version * mismatch: the language was generated with an incompatible version of the - * Tree-sitter CLI. Check the language's version using `ts_language_version` - * and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and - * `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants. + * Tree-sitter CLI. Check the language's version using [`ts_language_version`] + * and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and + * [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants. */ bool ts_parser_set_language(TSParser *self, const TSLanguage *language); -/** - * Get the parser's current language. - */ -const TSLanguage *ts_parser_language(const TSParser *self); - /** * Set the ranges of text that the parser should include when parsing. * @@ -185,10 +192,11 @@ const TSLanguage *ts_parser_language(const TSParser *self); * of ranges. The parser does *not* take ownership of these ranges; it copies * the data, so it doesn't matter how these ranges are allocated. * - * If `length` is zero, then the entire document will be parsed. Otherwise, + * If `count` is zero, then the entire document will be parsed. Otherwise, * the given ranges must be ordered from earliest to latest in the document, - * and they must not overlap. That is, the following must hold for all - * `i` < `length - 1`: ranges[i].end_byte <= ranges[i + 1].start_byte + * and they must not overlap. That is, the following must hold for all: + * + * `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte` * * If this requirement is not satisfied, the operation will fail, the ranges * will not be assigned, and this function will return `false`. On success, @@ -197,7 +205,7 @@ const TSLanguage *ts_parser_language(const TSParser *self); bool ts_parser_set_included_ranges( TSParser *self, const TSRange *ranges, - uint32_t length + uint32_t count ); /** @@ -205,11 +213,11 @@ bool ts_parser_set_included_ranges( * * The returned pointer is owned by the parser. The caller should not free it * or write to it. The length of the array will be written to the given - * `length` pointer. + * `count` pointer. */ const TSRange *ts_parser_included_ranges( const TSParser *self, - uint32_t *length + uint32_t *count ); /** @@ -220,35 +228,40 @@ const TSRange *ts_parser_included_ranges( * version of this document and the document has since been edited, pass the * previous syntax tree so that the unchanged parts of it can be reused. * This will save time and memory. For this to work correctly, you must have - * already edited the old syntax tree using the `ts_tree_edit` function in a + * already edited the old syntax tree using the [`ts_tree_edit`] function in a * way that exactly matches the source code changes. * - * The `TSInput` parameter lets you specify how to read the text. It has the + * The [`TSInput`] parameter lets you specify how to read the text. It has the * following three fields: - * 1. `read`: A function to retrieve a chunk of text at a given byte offset + * 1. [`read`]: A function to retrieve a chunk of text at a given byte offset * and (row, column) position. The function should return a pointer to the - * text and write its length to the `bytes_read` pointer. The parser does + * text and write its length to the [`bytes_read`] pointer. The parser does * not take ownership of this buffer; it just borrows it until it has * finished reading it. The function should write a zero value to the - * `bytes_read` pointer to indicate the end of the document. - * 2. `payload`: An arbitrary pointer that will be passed to each invocation - * of the `read` function. - * 3. `encoding`: An indication of how the text is encoded. Either + * [`bytes_read`] pointer to indicate the end of the document. + * 2. [`payload`]: An arbitrary pointer that will be passed to each invocation + * of the [`read`] function. + * 3. [`encoding`]: An indication of how the text is encoded. Either * `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. * * This function returns a syntax tree on success, and `NULL` on failure. There * are three possible reasons for failure: * 1. The parser does not have a language assigned. Check for this using the - `ts_parser_language` function. + [`ts_parser_language`] function. * 2. Parsing was cancelled due to a timeout that was set by an earlier call to - * the `ts_parser_set_timeout_micros` function. You can resume parsing from - * where the parser left out by calling `ts_parser_parse` again with the + * the [`ts_parser_set_timeout_micros`] function. You can resume parsing from + * where the parser left out by calling [`ts_parser_parse`] again with the * same arguments. Or you can start parsing from scratch by first calling - * `ts_parser_reset`. + * [`ts_parser_reset`]. * 3. Parsing was cancelled using a cancellation flag that was set by an - * earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing - * from where the parser left out by calling `ts_parser_parse` again with + * earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing + * from where the parser left out by calling [`ts_parser_parse`] again with * the same arguments. + * + * [`read`]: TSInput::read + * [`payload`]: TSInput::payload + * [`encoding`]: TSInput::encoding + * [`bytes_read`]: TSInput::read */ TSTree *ts_parser_parse( TSParser *self, @@ -258,7 +271,7 @@ TSTree *ts_parser_parse( /** * Use the parser to parse some source code stored in one contiguous buffer. - * The first two parameters are the same as in the `ts_parser_parse` function + * The first two parameters are the same as in the [`ts_parser_parse`] function * above. The second two parameters indicate the location of the buffer and its * length in bytes. */ @@ -272,7 +285,7 @@ TSTree *ts_parser_parse_string( /** * Use the parser to parse some source code stored in one contiguous buffer with * a given encoding. The first four parameters work the same as in the - * `ts_parser_parse_string` method above. The final parameter indicates whether + * [`ts_parser_parse_string`] method above. The final parameter indicates whether * the text is encoded as UTF8 or UTF16. */ TSTree *ts_parser_parse_string_encoding( @@ -288,9 +301,9 @@ TSTree *ts_parser_parse_string_encoding( * * If the parser previously failed because of a timeout or a cancellation, then * by default, it will resume where it left off on the next call to - * `ts_parser_parse` or other parsing functions. If you don't want to resume, + * [`ts_parser_parse`] or other parsing functions. If you don't want to resume, * and instead intend to use this parser to parse some other document, you must - * call `ts_parser_reset` first. + * call [`ts_parser_reset`] first. */ void ts_parser_reset(TSParser *self); @@ -299,9 +312,9 @@ void ts_parser_reset(TSParser *self); * take before halting. * * If parsing takes longer than this, it will halt early, returning NULL. - * See `ts_parser_parse` for more information. + * See [`ts_parser_parse`] for more information. */ -void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout); +void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros); /** * Get the duration in microseconds that parsing is allowed to take. @@ -313,7 +326,7 @@ uint64_t ts_parser_timeout_micros(const TSParser *self); * * If a non-null pointer is assigned, then the parser will periodically read * from this pointer during parsing. If it reads a non-zero value, it will - * halt early, returning NULL. See `ts_parser_parse` for more information. + * halt early, returning NULL. See [`ts_parser_parse`] for more information. */ void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag); @@ -342,7 +355,7 @@ TSLogger ts_parser_logger(const TSParser *self); * to pipe these graphs directly to a `dot(1)` process in order to generate * SVG output. You can turn off this logging by passing a negative number. */ -void ts_parser_print_dot_graphs(TSParser *self, int file); +void ts_parser_print_dot_graphs(TSParser *self, int fd); /******************/ /* Section - Tree */ @@ -373,20 +386,20 @@ TSNode ts_tree_root_node(const TSTree *self); TSNode ts_tree_root_node_with_offset( const TSTree *self, uint32_t offset_bytes, - TSPoint offset_point + TSPoint offset_extent ); /** * Get the language that was used to parse the syntax tree. */ -const TSLanguage *ts_tree_language(const TSTree *); +const TSLanguage *ts_tree_language(const TSTree *self); /** * Get the array of included ranges that was used to parse the syntax tree. * * The returned pointer must be freed by the caller. */ -TSRange *ts_tree_included_ranges(const TSTree *, uint32_t *length); +TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length); /** * Edit the syntax tree to keep it in sync with source code that has been @@ -403,7 +416,7 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit); * * For this to work correctly, the old syntax tree must have been edited such * that its ranges match up to the new tree. Generally, you'll want to call - * this function right after calling one of the `ts_parser_parse` functions. + * this function right after calling one of the [`ts_parser_parse`] functions. * You need to pass the old tree that was passed to parse, as well as the new * tree that was returned from that function. * @@ -420,7 +433,7 @@ TSRange *ts_tree_get_changed_ranges( /** * Write a DOT graph describing the syntax tree to the given file. */ -void ts_tree_print_dot_graph(const TSTree *, int file_descriptor); +void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor); /******************/ /* Section - Node */ @@ -429,32 +442,50 @@ void ts_tree_print_dot_graph(const TSTree *, int file_descriptor); /** * Get the node's type as a null-terminated string. */ -const char *ts_node_type(TSNode); +const char *ts_node_type(TSNode self); /** * Get the node's type as a numerical id. */ -TSSymbol ts_node_symbol(TSNode); +TSSymbol ts_node_symbol(TSNode self); + +/** + * Get the node's language. + */ +const TSLanguage *ts_node_language(TSNode self); + +/** + * Get the node's type as it appears in the grammar ignoring aliases as a + * null-terminated string. + */ +const char *ts_node_grammar_type(TSNode self); + +/** + * Get the node's type as a numerical id as it appears in the grammar ignoring + * aliases. This should be used in [`ts_language_next_state`] instead of + * [`ts_node_symbol`]. + */ +TSSymbol ts_node_grammar_symbol(TSNode self); /** * Get the node's start byte. */ -uint32_t ts_node_start_byte(TSNode); +uint32_t ts_node_start_byte(TSNode self); /** * Get the node's start position in terms of rows and columns. */ -TSPoint ts_node_start_point(TSNode); +TSPoint ts_node_start_point(TSNode self); /** * Get the node's end byte. */ -uint32_t ts_node_end_byte(TSNode); +uint32_t ts_node_end_byte(TSNode self); /** * Get the node's end position in terms of rows and columns. */ -TSPoint ts_node_end_point(TSNode); +TSPoint ts_node_end_point(TSNode self); /** * Get an S-expression representing the node as a string. @@ -462,148 +493,175 @@ TSPoint ts_node_end_point(TSNode); * This string is allocated with `malloc` and the caller is responsible for * freeing it using `free`. */ -char *ts_node_string(TSNode); +char *ts_node_string(TSNode self); /** - * Check if the node is null. Functions like `ts_node_child` and - * `ts_node_next_sibling` will return a null node to indicate that no such node + * Check if the node is null. Functions like [`ts_node_child`] and + * [`ts_node_next_sibling`] will return a null node to indicate that no such node * was found. */ -bool ts_node_is_null(TSNode); +bool ts_node_is_null(TSNode self); /** * Check if the node is *named*. Named nodes correspond to named rules in the * grammar, whereas *anonymous* nodes correspond to string literals in the * grammar. */ -bool ts_node_is_named(TSNode); +bool ts_node_is_named(TSNode self); /** * Check if the node is *missing*. Missing nodes are inserted by the parser in * order to recover from certain kinds of syntax errors. */ -bool ts_node_is_missing(TSNode); +bool ts_node_is_missing(TSNode self); /** * Check if the node is *extra*. Extra nodes represent things like comments, * which are not required the grammar, but can appear anywhere. */ -bool ts_node_is_extra(TSNode); +bool ts_node_is_extra(TSNode self); /** * Check if a syntax node has been edited. */ -bool ts_node_has_changes(TSNode); +bool ts_node_has_changes(TSNode self); /** * Check if the node is a syntax error or contains any syntax errors. */ -bool ts_node_has_error(TSNode); +bool ts_node_has_error(TSNode self); + +/** + * Check if the node is a syntax error. +*/ +bool ts_node_is_error(TSNode self); + +/** + * Get this node's parse state. +*/ +TSStateId ts_node_parse_state(TSNode self); + +/** + * Get the parse state after this node. +*/ +TSStateId ts_node_next_parse_state(TSNode self); /** * Get the node's immediate parent. + * Prefer [`ts_node_child_containing_descendant`] for + * iterating over the node's ancestors. */ -TSNode ts_node_parent(TSNode); +TSNode ts_node_parent(TSNode self); + +/** + * Get the node's child that contains `descendant`. + */ +TSNode ts_node_child_containing_descendant(TSNode self, TSNode descendant); /** * Get the node's child at the given index, where zero represents the first * child. */ -TSNode ts_node_child(TSNode, uint32_t); +TSNode ts_node_child(TSNode self, uint32_t child_index); /** * Get the field name for node's child at the given index, where zero represents * the first child. Returns NULL, if no field is found. */ -const char *ts_node_field_name_for_child(TSNode, uint32_t); +const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index); /** * Get the node's number of children. */ -uint32_t ts_node_child_count(TSNode); +uint32_t ts_node_child_count(TSNode self); /** * Get the node's *named* child at the given index. * - * See also `ts_node_is_named`. + * See also [`ts_node_is_named`]. */ -TSNode ts_node_named_child(TSNode, uint32_t); +TSNode ts_node_named_child(TSNode self, uint32_t child_index); /** * Get the node's number of *named* children. * - * See also `ts_node_is_named`. + * See also [`ts_node_is_named`]. */ -uint32_t ts_node_named_child_count(TSNode); +uint32_t ts_node_named_child_count(TSNode self); /** * Get the node's child with the given field name. */ TSNode ts_node_child_by_field_name( TSNode self, - const char *field_name, - uint32_t field_name_length + const char *name, + uint32_t name_length ); /** * Get the node's child with the given numerical field id. * * You can convert a field name to an id using the - * `ts_language_field_id_for_name` function. + * [`ts_language_field_id_for_name`] function. */ -TSNode ts_node_child_by_field_id(TSNode, TSFieldId); +TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id); /** * Get the node's next / previous sibling. */ -TSNode ts_node_next_sibling(TSNode); -TSNode ts_node_prev_sibling(TSNode); +TSNode ts_node_next_sibling(TSNode self); +TSNode ts_node_prev_sibling(TSNode self); /** * Get the node's next / previous *named* sibling. */ -TSNode ts_node_next_named_sibling(TSNode); -TSNode ts_node_prev_named_sibling(TSNode); +TSNode ts_node_next_named_sibling(TSNode self); +TSNode ts_node_prev_named_sibling(TSNode self); /** * Get the node's first child that extends beyond the given byte offset. */ -TSNode ts_node_first_child_for_byte(TSNode, uint32_t); +TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte); /** * Get the node's first named child that extends beyond the given byte offset. */ -TSNode ts_node_first_named_child_for_byte(TSNode, uint32_t); +TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte); + +/** + * Get the node's number of descendants, including one for the node itself. + */ +uint32_t ts_node_descendant_count(TSNode self); /** * Get the smallest node within this node that spans the given range of bytes * or (row, column) positions. */ -TSNode ts_node_descendant_for_byte_range(TSNode, uint32_t, uint32_t); -TSNode ts_node_descendant_for_point_range(TSNode, TSPoint, TSPoint); +TSNode ts_node_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end); +TSNode ts_node_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end); /** * Get the smallest named node within this node that spans the given range of * bytes or (row, column) positions. */ -TSNode ts_node_named_descendant_for_byte_range(TSNode, uint32_t, uint32_t); -TSNode ts_node_named_descendant_for_point_range(TSNode, TSPoint, TSPoint); +TSNode ts_node_named_descendant_for_byte_range(TSNode self, uint32_t start, uint32_t end); +TSNode ts_node_named_descendant_for_point_range(TSNode self, TSPoint start, TSPoint end); /** * Edit the node to keep it in-sync with source code that has been edited. * * This function is only rarely needed. When you edit a syntax tree with the - * `ts_tree_edit` function, all of the nodes that you retrieve from the tree - * afterward will already reflect the edit. You only need to use `ts_node_edit` - * when you have a `TSNode` instance that you want to keep and continue to use + * [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree + * afterward will already reflect the edit. You only need to use [`ts_node_edit`] + * when you have a [`TSNode`] instance that you want to keep and continue to use * after an edit. */ -void ts_node_edit(TSNode *, const TSInputEdit *); +void ts_node_edit(TSNode *self, const TSInputEdit *edit); /** * Check if two nodes are identical. */ -bool ts_node_eq(TSNode, TSNode); +bool ts_node_eq(TSNode self, TSNode other); /************************/ /* Section - TreeCursor */ @@ -613,41 +671,49 @@ bool ts_node_eq(TSNode, TSNode); * Create a new tree cursor starting from the given node. * * A tree cursor allows you to walk a syntax tree more efficiently than is - * possible using the `TSNode` functions. It is a mutable object that is always + * possible using the [`TSNode`] functions. It is a mutable object that is always * on a certain syntax node, and can be moved imperatively to different nodes. */ -TSTreeCursor ts_tree_cursor_new(TSNode); +TSTreeCursor ts_tree_cursor_new(TSNode node); /** * Delete a tree cursor, freeing all of the memory that it used. */ -void ts_tree_cursor_delete(TSTreeCursor *); +void ts_tree_cursor_delete(TSTreeCursor *self); /** * Re-initialize a tree cursor to start at a different node. */ -void ts_tree_cursor_reset(TSTreeCursor *, TSNode); +void ts_tree_cursor_reset(TSTreeCursor *self, TSNode node); + +/** + * Re-initialize a tree cursor to the same position as another cursor. + * + * Unlike [`ts_tree_cursor_reset`], this will not lose parent information and + * allows reusing already created cursors. +*/ +void ts_tree_cursor_reset_to(TSTreeCursor *dst, const TSTreeCursor *src); /** * Get the tree cursor's current node. */ -TSNode ts_tree_cursor_current_node(const TSTreeCursor *); +TSNode ts_tree_cursor_current_node(const TSTreeCursor *self); /** * Get the field name of the tree cursor's current node. * * This returns `NULL` if the current node doesn't have a field. - * See also `ts_node_child_by_field_name`. + * See also [`ts_node_child_by_field_name`]. */ -const char *ts_tree_cursor_current_field_name(const TSTreeCursor *); +const char *ts_tree_cursor_current_field_name(const TSTreeCursor *self); /** * Get the field id of the tree cursor's current node. * * This returns zero if the current node doesn't have a field. - * See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`. + * See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`]. */ -TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *); +TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *self); /** * Move the cursor to the parent of its current node. @@ -655,7 +721,7 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *); * This returns `true` if the cursor successfully moved, and returns `false` * if there was no parent node (the cursor was already on the root node). */ -bool ts_tree_cursor_goto_parent(TSTreeCursor *); +bool ts_tree_cursor_goto_parent(TSTreeCursor *self); /** * Move the cursor to the next sibling of its current node. @@ -663,7 +729,20 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *); * This returns `true` if the cursor successfully moved, and returns `false` * if there was no next sibling node. */ -bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *); +bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self); + +/** + * Move the cursor to the previous sibling of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` if + * there was no previous sibling node. + * + * Note, that this function may be slower than + * [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In + * the worst case, this will need to iterate through all the children upto the + * previous sibling node to recalculate its position. + */ +bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self); /** * Move the cursor to the first child of its current node. @@ -671,7 +750,38 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *); * This returns `true` if the cursor successfully moved, and returns `false` * if there were no children. */ -bool ts_tree_cursor_goto_first_child(TSTreeCursor *); +bool ts_tree_cursor_goto_first_child(TSTreeCursor *self); + +/** + * Move the cursor to the last child of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` if + * there were no children. + * + * Note that this function may be slower than [`ts_tree_cursor_goto_first_child`] + * because it needs to iterate through all the children to compute the child's + * position. + */ +bool ts_tree_cursor_goto_last_child(TSTreeCursor *self); + +/** + * Move the cursor to the node that is the nth descendant of + * the original node that the cursor was constructed with, where + * zero represents the original node itself. + */ +void ts_tree_cursor_goto_descendant(TSTreeCursor *self, uint32_t goal_descendant_index); + +/** + * Get the index of the cursor's current node out of all of the + * descendants of the original node that the cursor was constructed with. + */ +uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *self); + +/** + * Get the depth of the cursor's current node relative to the original + * node that the cursor was constructed with. + */ +uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *self); /** * Move the cursor to the first child of its current node that extends beyond @@ -680,10 +790,10 @@ bool ts_tree_cursor_goto_first_child(TSTreeCursor *); * This returns the index of the child node if one was found, and returns -1 * if no such child was found. */ -int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t); -int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *, TSPoint); +int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte); +int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point); -TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *); +TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *cursor); /*******************/ /* Section - Query */ @@ -694,7 +804,7 @@ TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *); * patterns. The query is associated with a particular language, and can * only be run on syntax nodes parsed with that language. * - * If all of the given patterns are valid, this returns a `TSQuery`. + * If all of the given patterns are valid, this returns a [`TSQuery`]. * If a pattern is invalid, this returns `NULL`, and provides two pieces * of information about the problem: * 1. The byte offset of the error is written to the `error_offset` parameter. @@ -711,14 +821,14 @@ TSQuery *ts_query_new( /** * Delete a query, freeing all of the memory that it used. */ -void ts_query_delete(TSQuery *); +void ts_query_delete(TSQuery *self); /** * Get the number of patterns, captures, or string literals in the query. */ -uint32_t ts_query_pattern_count(const TSQuery *); -uint32_t ts_query_capture_count(const TSQuery *); -uint32_t ts_query_string_count(const TSQuery *); +uint32_t ts_query_pattern_count(const TSQuery *self); +uint32_t ts_query_capture_count(const TSQuery *self); +uint32_t ts_query_string_count(const TSQuery *self); /** * Get the byte offset where the given pattern starts in the query's source. @@ -726,7 +836,7 @@ uint32_t ts_query_string_count(const TSQuery *); * This can be useful when combining queries by concatenating their source * code strings. */ -uint32_t ts_query_start_byte_for_pattern(const TSQuery *, uint32_t); +uint32_t ts_query_start_byte_for_pattern(const TSQuery *self, uint32_t pattern_index); /** * Get all of the predicates for the given pattern in the query. @@ -736,10 +846,10 @@ uint32_t ts_query_start_byte_for_pattern(const TSQuery *, uint32_t); * the `type` field: * - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names * of captures. Their `value_id` can be used with the - * `ts_query_capture_name_for_id` function to obtain the name of the capture. + * [`ts_query_capture_name_for_id`] function to obtain the name of the capture. * - `TSQueryPredicateStepTypeString` - Steps with this type represent literal * strings. Their `value_id` can be used with the - * `ts_query_string_value_for_id` function to obtain their string value. + * [`ts_query_string_value_for_id`] function to obtain their string value. * - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels* * that represent the end of an individual predicate. If a pattern has two * predicates, then there will be two steps with this `type` in the array. @@ -747,7 +857,7 @@ uint32_t ts_query_start_byte_for_pattern(const TSQuery *, uint32_t); const TSQueryPredicateStep *ts_query_predicates_for_pattern( const TSQuery *self, uint32_t pattern_index, - uint32_t *length + uint32_t *step_count ); /* @@ -777,8 +887,8 @@ bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, uint32_t byte_o * numeric id based on the order that it appeared in the query's source. */ const char *ts_query_capture_name_for_id( - const TSQuery *, - uint32_t id, + const TSQuery *self, + uint32_t index, uint32_t *length ); @@ -787,14 +897,14 @@ const char *ts_query_capture_name_for_id( * with a numeric id based on the order that it appeared in the query's source. */ TSQuantifier ts_query_capture_quantifier_for_id( - const TSQuery *, - uint32_t pattern_id, - uint32_t capture_id + const TSQuery *self, + uint32_t pattern_index, + uint32_t capture_index ); const char *ts_query_string_value_for_id( - const TSQuery *, - uint32_t id, + const TSQuery *self, + uint32_t index, uint32_t *length ); @@ -805,7 +915,7 @@ const char *ts_query_string_value_for_id( * any resource usage associated with recording the capture. Currently, there * is no way to undo this. */ -void ts_query_disable_capture(TSQuery *, const char *, uint32_t); +void ts_query_disable_capture(TSQuery *self, const char *name, uint32_t length); /** * Disable a certain pattern within a query. @@ -813,41 +923,41 @@ void ts_query_disable_capture(TSQuery *, const char *, uint32_t); * This prevents the pattern from matching and removes most of the overhead * associated with the pattern. Currently, there is no way to undo this. */ -void ts_query_disable_pattern(TSQuery *, uint32_t); +void ts_query_disable_pattern(TSQuery *self, uint32_t pattern_index); /** * Create a new cursor for executing a given query. * * The cursor stores the state that is needed to iteratively search - * for matches. To use the query cursor, first call `ts_query_cursor_exec` + * for matches. To use the query cursor, first call [`ts_query_cursor_exec`] * to start running a given query on a given syntax node. Then, there are * two options for consuming the results of the query: - * 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the + * 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the * *matches* in the order that they were found. Each match contains the * index of the pattern that matched, and an array of captures. Because * multiple patterns can match the same set of nodes, one match may contain * captures that appear *before* some of the captures from a previous match. - * 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the + * 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the * individual *captures* in the order that they appear. This is useful if * don't care about which pattern matched, and just want a single ordered * sequence of captures. * * If you don't care about consuming all of the results, you can stop calling - * `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point. + * [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point. * You can then start executing another query on another node by calling - * `ts_query_cursor_exec` again. + * [`ts_query_cursor_exec`] again. */ TSQueryCursor *ts_query_cursor_new(void); /** * Delete a query cursor, freeing all of the memory that it used. */ -void ts_query_cursor_delete(TSQueryCursor *); +void ts_query_cursor_delete(TSQueryCursor *self); /** * Start running a given query on a given node. */ -void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode); +void ts_query_cursor_exec(TSQueryCursor *self, const TSQuery *query, TSNode node); /** * Manage the maximum number of in-progress matches allowed by this query @@ -860,16 +970,16 @@ void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode); * any number of pending matches, dynamically allocating new space for them as * needed as the query is executed. */ -bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *); -uint32_t ts_query_cursor_match_limit(const TSQueryCursor *); -void ts_query_cursor_set_match_limit(TSQueryCursor *, uint32_t); +bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self); +uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self); +void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit); /** * Set the range of bytes or (row, column) positions in which the query * will be executed. */ -void ts_query_cursor_set_byte_range(TSQueryCursor *, uint32_t, uint32_t); -void ts_query_cursor_set_point_range(TSQueryCursor *, TSPoint, TSPoint); +void ts_query_cursor_set_byte_range(TSQueryCursor *self, uint32_t start_byte, uint32_t end_byte); +void ts_query_cursor_set_point_range(TSQueryCursor *self, TSPoint start_point, TSPoint end_point); /** * Advance to the next match of the currently running query. @@ -877,8 +987,8 @@ void ts_query_cursor_set_point_range(TSQueryCursor *, TSPoint, TSPoint); * If there is a match, write it to `*match` and return `true`. * Otherwise, return `false`. */ -bool ts_query_cursor_next_match(TSQueryCursor *, TSQueryMatch *match); -void ts_query_cursor_remove_match(TSQueryCursor *, uint32_t id); +bool ts_query_cursor_next_match(TSQueryCursor *self, TSQueryMatch *match); +void ts_query_cursor_remove_match(TSQueryCursor *self, uint32_t match_id); /** * Advance to the next capture of the currently running query. @@ -887,24 +997,56 @@ void ts_query_cursor_remove_match(TSQueryCursor *, uint32_t id); * the matche's capture list to `*capture_index`. Otherwise, return `false`. */ bool ts_query_cursor_next_capture( - TSQueryCursor *, + TSQueryCursor *self, TSQueryMatch *match, uint32_t *capture_index ); +/** + * Set the maximum start depth for a query cursor. + * + * This prevents cursors from exploring children nodes at a certain depth. + * Note if a pattern includes many children, then they will still be checked. + * + * The zero max start depth value can be used as a special behavior and + * it helps to destructure a subtree by staying on a node and using captures + * for interested parts. Note that the zero max start depth only limit a search + * depth for a pattern's root node but other nodes that are parts of the pattern + * may be searched at any depth what defined by the pattern structure. + * + * Set to `UINT32_MAX` to remove the maximum start depth. + */ +void ts_query_cursor_set_max_start_depth(TSQueryCursor *self, uint32_t max_start_depth); + /**********************/ /* Section - Language */ /**********************/ +/** + * Get another reference to the given language. + */ +const TSLanguage *ts_language_copy(const TSLanguage *self); + +/** + * Free any dynamically-allocated resources for this language, if + * this is the last reference. + */ +void ts_language_delete(const TSLanguage *self); + /** * Get the number of distinct node types in the language. */ -uint32_t ts_language_symbol_count(const TSLanguage *); +uint32_t ts_language_symbol_count(const TSLanguage *self); + +/** + * Get the number of valid states in this language. +*/ +uint32_t ts_language_state_count(const TSLanguage *self); /** * Get a node type string for the given numerical id. */ -const char *ts_language_symbol_name(const TSLanguage *, TSSymbol); +const char *ts_language_symbol_name(const TSLanguage *self, TSSymbol symbol); /** * Get the numerical id for the given node type string. @@ -919,34 +1061,177 @@ TSSymbol ts_language_symbol_for_name( /** * Get the number of distinct field names in the language. */ -uint32_t ts_language_field_count(const TSLanguage *); +uint32_t ts_language_field_count(const TSLanguage *self); /** * Get the field name string for the given numerical id. */ -const char *ts_language_field_name_for_id(const TSLanguage *, TSFieldId); +const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id); /** * Get the numerical id for the given field name string. */ -TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t); +TSFieldId ts_language_field_id_for_name(const TSLanguage *self, const char *name, uint32_t name_length); /** * Check whether the given node type id belongs to named nodes, anonymous nodes, * or a hidden nodes. * - * See also `ts_node_is_named`. Hidden nodes are never returned from the API. + * See also [`ts_node_is_named`]. Hidden nodes are never returned from the API. */ -TSSymbolType ts_language_symbol_type(const TSLanguage *, TSSymbol); +TSSymbolType ts_language_symbol_type(const TSLanguage *self, TSSymbol symbol); /** * Get the ABI version number for this language. This version number is used * to ensure that languages were generated by a compatible version of * Tree-sitter. * - * See also `ts_parser_set_language`. + * See also [`ts_parser_set_language`]. */ -uint32_t ts_language_version(const TSLanguage *); +uint32_t ts_language_version(const TSLanguage *self); + +/** + * Get the next parse state. Combine this with lookahead iterators to generate + * completion suggestions or valid symbols in error nodes. Use + * [`ts_node_grammar_symbol`] for valid symbols. +*/ +TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol); + +/********************************/ +/* Section - Lookahead Iterator */ +/********************************/ + +/** + * Create a new lookahead iterator for the given language and parse state. + * + * This returns `NULL` if state is invalid for the language. + * + * Repeatedly using [`ts_lookahead_iterator_next`] and + * [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the + * given parse state. Newly created lookahead iterators will contain the `ERROR` + * symbol. + * + * Lookahead iterators can be useful to generate suggestions and improve syntax + * error diagnostics. To get symbols valid in an ERROR node, use the lookahead + * iterator on its first leaf node state. For `MISSING` nodes, a lookahead + * iterator created on the previous non-extra leaf node may be appropriate. +*/ +TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state); + +/** + * Delete a lookahead iterator freeing all the memory used. +*/ +void ts_lookahead_iterator_delete(TSLookaheadIterator *self); + +/** + * Reset the lookahead iterator to another state. + * + * This returns `true` if the iterator was reset to the given state and `false` + * otherwise. +*/ +bool ts_lookahead_iterator_reset_state(TSLookaheadIterator *self, TSStateId state); + +/** + * Reset the lookahead iterator. + * + * This returns `true` if the language was set successfully and `false` + * otherwise. +*/ +bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state); + +/** + * Get the current language of the lookahead iterator. +*/ +const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self); + +/** + * Advance the lookahead iterator to the next symbol. + * + * This returns `true` if there is a new symbol and `false` otherwise. +*/ +bool ts_lookahead_iterator_next(TSLookaheadIterator *self); + +/** + * Get the current symbol of the lookahead iterator; +*/ +TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self); + +/** + * Get the current symbol type of the lookahead iterator as a null terminated + * string. +*/ +const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self); + +/*************************************/ +/* Section - WebAssembly Integration */ +/************************************/ + +typedef struct wasm_engine_t TSWasmEngine; +typedef struct TSWasmStore TSWasmStore; + +typedef enum { + TSWasmErrorKindNone = 0, + TSWasmErrorKindParse, + TSWasmErrorKindCompile, + TSWasmErrorKindInstantiate, + TSWasmErrorKindAllocate, +} TSWasmErrorKind; + +typedef struct { + TSWasmErrorKind kind; + char *message; +} TSWasmError; + +/** + * Create a Wasm store. + */ +TSWasmStore *ts_wasm_store_new( + TSWasmEngine *engine, + TSWasmError *error +); + +/** + * Free the memory associated with the given Wasm store. + */ +void ts_wasm_store_delete(TSWasmStore *); + +/** + * Create a language from a buffer of Wasm. The resulting language behaves + * like any other Tree-sitter language, except that in order to use it with + * a parser, that parser must have a Wasm store. Note that the language + * can be used with any Wasm store, it doesn't need to be the same store that + * was used to originally load it. + */ +const TSLanguage *ts_wasm_store_load_language( + TSWasmStore *, + const char *name, + const char *wasm, + uint32_t wasm_len, + TSWasmError *error +); + +/** + * Get the number of languages instantiated in the given wasm store. + */ +size_t ts_wasm_store_language_count(const TSWasmStore *); + +/** + * Check if the language came from a Wasm module. If so, then in order to use + * this language with a Parser, that parser must have a Wasm store assigned. + */ +bool ts_language_is_wasm(const TSLanguage *); + +/** + * Assign the given Wasm store to the parser. A parser must have a Wasm store + * in order to use Wasm languages. + */ +void ts_parser_set_wasm_store(TSParser *, TSWasmStore *); + +/** + * Remove the parser's current Wasm store and return it. This returns NULL if + * the parser doesn't have a Wasm store. + */ +TSWasmStore *ts_parser_take_wasm_store(TSParser *); /**********************************/ /* Section - Global Configuration */ @@ -958,10 +1243,10 @@ uint32_t ts_language_version(const TSLanguage *); * By default, Tree-sitter uses the standard libc allocation functions, * but aborts the process when an allocation fails. This function lets * you supply alternative allocation functions at runtime. - * + * * If you pass `NULL` for any parameter, Tree-sitter will switch back to * its default implementation of that function. - * + * * If you call this function after the library has already been used, then * you must ensure that either: * 1. All the existing objects have been freed. @@ -979,4 +1264,10 @@ void ts_set_allocator( } #endif +#ifndef TREE_SITTER_HIDE_SYMBOLS +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC visibility pop +#endif +#endif + #endif // TREE_SITTER_API_H_ diff --git a/lib/src/alloc.c b/lib/src/alloc.c index a5d86fc..e5cb1d5 100644 --- a/lib/src/alloc.c +++ b/lib/src/alloc.c @@ -1,11 +1,12 @@ #include "alloc.h" +#include "tree_sitter/api.h" #include static void *ts_malloc_default(size_t size) { void *result = malloc(size); if (size > 0 && !result) { fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); - exit(1); + abort(); } return result; } @@ -14,7 +15,7 @@ static void *ts_calloc_default(size_t count, size_t size) { void *result = calloc(count, size); if (count > 0 && !result) { fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); - exit(1); + abort(); } return result; } @@ -23,26 +24,25 @@ static void *ts_realloc_default(void *buffer, size_t size) { void *result = realloc(buffer, size); if (size > 0 && !result) { fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); - exit(1); + abort(); } return result; } // Allow clients to override allocation functions dynamically -void *(*ts_current_malloc)(size_t) = ts_malloc_default; -void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default; -void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default; -void (*ts_current_free)(void *) = free; +TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default; +TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default; +TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default; +TS_PUBLIC void (*ts_current_free)(void *) = free; void ts_set_allocator( - void *(*new_malloc)(size_t), - void *(*new_calloc)(size_t, size_t), - void *(*new_realloc)(void *, size_t), - void (*new_free)(void *) + void *(*new_malloc)(size_t size), + void *(*new_calloc)(size_t count, size_t size), + void *(*new_realloc)(void *ptr, size_t size), + void (*new_free)(void *ptr) ) { ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default; ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default; ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default; ts_current_free = new_free ? new_free : free; } - diff --git a/lib/src/alloc.h b/lib/src/alloc.h index c51f84a..a0eadb7 100644 --- a/lib/src/alloc.h +++ b/lib/src/alloc.h @@ -1,20 +1,24 @@ #ifndef TREE_SITTER_ALLOC_H_ #define TREE_SITTER_ALLOC_H_ -#include "tree_sitter/api.h" - #ifdef __cplusplus extern "C" { #endif -#include #include #include +#include + +#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32) +#define TS_PUBLIC +#else +#define TS_PUBLIC __attribute__((visibility("default"))) +#endif -extern void *(*ts_current_malloc)(size_t); -extern void *(*ts_current_calloc)(size_t, size_t); -extern void *(*ts_current_realloc)(void *, size_t); -extern void (*ts_current_free)(void *); +TS_PUBLIC extern void *(*ts_current_malloc)(size_t); +TS_PUBLIC extern void *(*ts_current_calloc)(size_t, size_t); +TS_PUBLIC extern void *(*ts_current_realloc)(void *, size_t); +TS_PUBLIC extern void (*ts_current_free)(void *); // Allow clients to override allocation functions #ifndef ts_malloc @@ -34,4 +38,4 @@ extern void (*ts_current_free)(void *); } #endif -#endif // TREE_SITTER_ALLOC_H_ +#endif // TREE_SITTER_ALLOC_H_ diff --git a/lib/src/array.h b/lib/src/array.h index abec941..15a3b23 100644 --- a/lib/src/array.h +++ b/lib/src/array.h @@ -5,12 +5,20 @@ extern "C" { #endif -#include -#include -#include +#include "./alloc.h" + #include #include -#include "./alloc.h" +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning(disable : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif #define Array(T) \ struct { \ @@ -19,126 +27,149 @@ extern "C" { uint32_t capacity; \ } +/// Initialize an array. #define array_init(self) \ ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) +/// Create an empty array. #define array_new() \ { NULL, 0, 0 } -#define array_get(self, index) \ - (assert((uint32_t)index < (self)->size), &(self)->contents[index]) +/// Get a pointer to the element at a given `index` in the array. +#define array_get(self, _index) \ + (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) +/// Get a pointer to the first element in the array. #define array_front(self) array_get(self, 0) +/// Get a pointer to the last element in the array. #define array_back(self) array_get(self, (self)->size - 1) +/// Clear the array, setting its size to zero. Note that this does not free any +/// memory allocated for the array's contents. #define array_clear(self) ((self)->size = 0) +/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is +/// less than the array's current capacity, this function has no effect. #define array_reserve(self, new_capacity) \ - array__reserve((VoidArray *)(self), array__elem_size(self), new_capacity) + _array__reserve((Array *)(self), array_elem_size(self), new_capacity) -// Free any memory allocated for this array. -#define array_delete(self) array__delete((VoidArray *)self) +/// Free any memory allocated for this array. Note that this does not free any +/// memory allocated for the array's contents. +#define array_delete(self) _array__delete((Array *)(self)) +/// Push a new `element` onto the end of the array. #define array_push(self, element) \ - (array__grow((VoidArray *)(self), 1, array__elem_size(self)), \ + (_array__grow((Array *)(self), 1, array_elem_size(self)), \ (self)->contents[(self)->size++] = (element)) -// Increase the array's size by a given number of elements, reallocating -// if necessary. New elements are zero-initialized. +/// Increase the array's size by `count` elements. +/// New elements are zero-initialized. #define array_grow_by(self, count) \ - (array__grow((VoidArray *)(self), count, array__elem_size(self)), \ - memset((self)->contents + (self)->size, 0, (count) * array__elem_size(self)), \ - (self)->size += (count)) + do { \ + if ((count) == 0) break; \ + _array__grow((Array *)(self), count, array_elem_size(self)); \ + memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ + (self)->size += (count); \ + } while (0) +/// Append all elements from one array to the end of another. #define array_push_all(self, other) \ array_extend((self), (other)->size, (other)->contents) -// Append `count` elements to the end of the array, reading their values from the -// `contents` pointer. +/// Append `count` elements to the end of the array, reading their values from the +/// `contents` pointer. #define array_extend(self, count, contents) \ - array__splice( \ - (VoidArray *)(self), array__elem_size(self), (self)->size, \ + _array__splice( \ + (Array *)(self), array_elem_size(self), (self)->size, \ 0, count, contents \ ) -// Remove `old_count` elements from the array starting at the given `index`. At -// the same index, insert `new_count` new elements, reading their values from the -// `new_contents` pointer. -#define array_splice(self, index, old_count, new_count, new_contents) \ - array__splice( \ - (VoidArray *)(self), array__elem_size(self), index, \ +/// Remove `old_count` elements from the array starting at the given `index`. At +/// the same index, insert `new_count` new elements, reading their values from the +/// `new_contents` pointer. +#define array_splice(self, _index, old_count, new_count, new_contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), _index, \ old_count, new_count, new_contents \ ) -// Insert one `element` into the array at the given `index`. -#define array_insert(self, index, element) \ - array__splice((VoidArray *)(self), array__elem_size(self), index, 0, 1, &element) +/// Insert one `element` into the array at the given `index`. +#define array_insert(self, _index, element) \ + _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) -// Remove one `element` from the array at the given `index`. -#define array_erase(self, index) \ - array__erase((VoidArray *)(self), array__elem_size(self), index) +/// Remove one element from the array at the given `index`. +#define array_erase(self, _index) \ + _array__erase((Array *)(self), array_elem_size(self), _index) +/// Pop the last element off the array, returning the element by value. #define array_pop(self) ((self)->contents[--(self)->size]) +/// Assign the contents of one array to another, reallocating if necessary. #define array_assign(self, other) \ - array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self)) + _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) +/// Swap one array with another #define array_swap(self, other) \ - array__swap((VoidArray *)(self), (VoidArray *)(other)) - -// Search a sorted array for a given `needle` value, using the given `compare` -// callback to determine the order. -// -// If an existing element is found to be equal to `needle`, then the `index` -// out-parameter is set to the existing value's index, and the `exists` -// out-parameter is set to true. Otherwise, `index` is set to an index where -// `needle` should be inserted in order to preserve the sorting, and `exists` -// is set to false. -#define array_search_sorted_with(self, compare, needle, index, exists) \ - array__search_sorted(self, 0, compare, , needle, index, exists) - -// Search a sorted array for a given `needle` value, using integer comparisons -// of a given struct field (specified with a leading dot) to determine the order. -// -// See also `array_search_sorted_with`. -#define array_search_sorted_by(self, field, needle, index, exists) \ - array__search_sorted(self, 0, _compare_int, field, needle, index, exists) - -// Insert a given `value` into a sorted array, using the given `compare` -// callback to determine the order. + _array__swap((Array *)(self), (Array *)(other)) + +/// Get the size of the array contents +#define array_elem_size(self) (sizeof *(self)->contents) + +/// Search a sorted array for a given `needle` value, using the given `compare` +/// callback to determine the order. +/// +/// If an existing element is found to be equal to `needle`, then the `index` +/// out-parameter is set to the existing value's index, and the `exists` +/// out-parameter is set to true. Otherwise, `index` is set to an index where +/// `needle` should be inserted in order to preserve the sorting, and `exists` +/// is set to false. +#define array_search_sorted_with(self, compare, needle, _index, _exists) \ + _array__search_sorted(self, 0, compare, , needle, _index, _exists) + +/// Search a sorted array for a given `needle` value, using integer comparisons +/// of a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_with`. +#define array_search_sorted_by(self, field, needle, _index, _exists) \ + _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) + +/// Insert a given `value` into a sorted array, using the given `compare` +/// callback to determine the order. #define array_insert_sorted_with(self, compare, value) \ do { \ - unsigned index, exists; \ - array_search_sorted_with(self, compare, &(value), &index, &exists); \ - if (!exists) array_insert(self, index, value); \ + unsigned _index, _exists; \ + array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ } while (0) -// Insert a given `value` into a sorted array, using integer comparisons of -// a given struct field (specified with a leading dot) to determine the order. -// -// See also `array_search_sorted_by`. +/// Insert a given `value` into a sorted array, using integer comparisons of +/// a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_by`. #define array_insert_sorted_by(self, field, value) \ do { \ - unsigned index, exists; \ - array_search_sorted_by(self, field, (value) field, &index, &exists); \ - if (!exists) array_insert(self, index, value); \ + unsigned _index, _exists; \ + array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ } while (0) // Private -typedef Array(void) VoidArray; +typedef Array(void) Array; -#define array__elem_size(self) sizeof(*(self)->contents) - -static inline void array__delete(VoidArray *self) { - ts_free(self->contents); - self->contents = NULL; - self->size = 0; - self->capacity = 0; +/// This is not what you're looking for, see `array_delete`. +static inline void _array__delete(Array *self) { + if (self->contents) { + ts_free(self->contents); + self->contents = NULL; + self->size = 0; + self->capacity = 0; + } } -static inline void array__erase(VoidArray *self, size_t element_size, +/// This is not what you're looking for, see `array_erase`. +static inline void _array__erase(Array *self, size_t element_size, uint32_t index) { assert(index < self->size); char *contents = (char *)self->contents; @@ -147,7 +178,8 @@ static inline void array__erase(VoidArray *self, size_t element_size, self->size--; } -static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t new_capacity) { +/// This is not what you're looking for, see `array_reserve`. +static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { if (new_capacity > self->capacity) { if (self->contents) { self->contents = ts_realloc(self->contents, new_capacity * element_size); @@ -158,29 +190,33 @@ static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t } } -static inline void array__assign(VoidArray *self, const VoidArray *other, size_t element_size) { - array__reserve(self, element_size, other->size); +/// This is not what you're looking for, see `array_assign`. +static inline void _array__assign(Array *self, const Array *other, size_t element_size) { + _array__reserve(self, element_size, other->size); self->size = other->size; memcpy(self->contents, other->contents, self->size * element_size); } -static inline void array__swap(VoidArray *self, VoidArray *other) { - VoidArray swap = *other; +/// This is not what you're looking for, see `array_swap`. +static inline void _array__swap(Array *self, Array *other) { + Array swap = *other; *other = *self; *self = swap; } -static inline void array__grow(VoidArray *self, uint32_t count, size_t element_size) { +/// This is not what you're looking for, see `array_push` or `array_grow_by`. +static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { uint32_t new_size = self->size + count; if (new_size > self->capacity) { uint32_t new_capacity = self->capacity * 2; if (new_capacity < 8) new_capacity = 8; if (new_capacity < new_size) new_capacity = new_size; - array__reserve(self, element_size, new_capacity); + _array__reserve(self, element_size, new_capacity); } } -static inline void array__splice(VoidArray *self, size_t element_size, +/// This is not what you're looking for, see `array_splice`. +static inline void _array__splice(Array *self, size_t element_size, uint32_t index, uint32_t old_count, uint32_t new_count, const void *elements) { uint32_t new_size = self->size + new_count - old_count; @@ -188,7 +224,7 @@ static inline void array__splice(VoidArray *self, size_t element_size, uint32_t new_end = index + new_count; assert(old_end <= self->size); - array__reserve(self, element_size, new_size); + _array__reserve(self, element_size, new_size); char *contents = (char *)self->contents; if (self->size > old_end) { @@ -216,30 +252,37 @@ static inline void array__splice(VoidArray *self, size_t element_size, self->size += new_count - old_count; } -// A binary search routine, based on Rust's `std::slice::binary_search_by`. -#define array__search_sorted(self, start, compare, suffix, needle, index, exists) \ +/// A binary search routine, based on Rust's `std::slice::binary_search_by`. +/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. +#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ do { \ - *(index) = start; \ - *(exists) = false; \ - uint32_t size = (self)->size - *(index); \ + *(_index) = start; \ + *(_exists) = false; \ + uint32_t size = (self)->size - *(_index); \ if (size == 0) break; \ int comparison; \ while (size > 1) { \ uint32_t half_size = size / 2; \ - uint32_t mid_index = *(index) + half_size; \ + uint32_t mid_index = *(_index) + half_size; \ comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ - if (comparison <= 0) *(index) = mid_index; \ + if (comparison <= 0) *(_index) = mid_index; \ size -= half_size; \ } \ - comparison = compare(&((self)->contents[*(index)] suffix), (needle)); \ - if (comparison == 0) *(exists) = true; \ - else if (comparison < 0) *(index) += 1; \ + comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ + if (comparison == 0) *(_exists) = true; \ + else if (comparison < 0) *(_index) += 1; \ } while (0) -// Helper macro for the `_sorted_by` routines below. This takes the left (existing) -// parameter by reference in order to work with the generic sorting function above. +/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) +/// parameter by reference in order to work with the generic sorting function above. #define _compare_int(a, b) ((int)*(a) - (int)(b)) +#ifdef _MSC_VER +#pragma warning(default : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + #ifdef __cplusplus } #endif diff --git a/lib/src/atomic.h b/lib/src/atomic.h index 1657324..e680b60 100644 --- a/lib/src/atomic.h +++ b/lib/src/atomic.h @@ -1,7 +1,9 @@ #ifndef TREE_SITTER_ATOMIC_H_ #define TREE_SITTER_ATOMIC_H_ +#include #include +#include #ifdef __TINYC__ @@ -46,11 +48,19 @@ static inline size_t atomic_load(const volatile size_t *p) { } static inline uint32_t atomic_inc(volatile uint32_t *p) { - return __sync_add_and_fetch(p, 1u); + #ifdef __ATOMIC_RELAXED + return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST); + #else + return __sync_add_and_fetch(p, 1U); + #endif } static inline uint32_t atomic_dec(volatile uint32_t *p) { - return __sync_sub_and_fetch(p, 1u); + #ifdef __ATOMIC_RELAXED + return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST); + #else + return __sync_sub_and_fetch(p, 1U); + #endif } #endif diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c index 18a4241..bcf8da9 100644 --- a/lib/src/get_changed_ranges.c +++ b/lib/src/get_changed_ranges.c @@ -210,7 +210,7 @@ static void iterator_ascend(Iterator *self) { static bool iterator_descend(Iterator *self, uint32_t goal_position) { if (self->in_padding) return false; - bool did_descend; + bool did_descend = false; do { did_descend = false; TreeCursorEntry entry = *array_back(&self->cursor.stack); diff --git a/lib/src/language.c b/lib/src/language.c index d0b497d..d49907f 100644 --- a/lib/src/language.c +++ b/lib/src/language.c @@ -1,12 +1,29 @@ #include "./language.h" -#include "./subtree.h" -#include "./error_costs.h" +#include "./wasm_store.h" +#include "tree_sitter/api.h" #include +const TSLanguage *ts_language_copy(const TSLanguage *self) { + if (self && ts_language_is_wasm(self)) { + ts_wasm_language_retain(self); + } + return self; +} + +void ts_language_delete(const TSLanguage *self) { + if (self && ts_language_is_wasm(self)) { + ts_wasm_language_release(self); + } +} + uint32_t ts_language_symbol_count(const TSLanguage *self) { return self->symbol_count + self->alias_count; } +uint32_t ts_language_state_count(const TSLanguage *self) { + return self->state_count; +} + uint32_t ts_language_version(const TSLanguage *self) { return self->version; } @@ -56,6 +73,28 @@ TSSymbol ts_language_public_symbol( return self->public_symbol_map[symbol]; } +TSStateId ts_language_next_state( + const TSLanguage *self, + TSStateId state, + TSSymbol symbol +) { + if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { + return 0; + } else if (symbol < self->token_count) { + uint32_t count; + const TSParseAction *actions = ts_language_actions(self, state, symbol, &count); + if (count > 0) { + TSParseAction action = actions[count - 1]; + if (action.type == TSParseActionTypeShift) { + return action.shift.extra ? state : action.shift.state; + } + } + return 0; + } else { + return ts_language_lookup(self, state, symbol); + } +} + const char *ts_language_symbol_name( const TSLanguage *self, TSSymbol symbol @@ -78,7 +117,7 @@ TSSymbol ts_language_symbol_for_name( bool is_named ) { if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error; - uint32_t count = ts_language_symbol_count(self); + uint16_t count = (uint16_t)ts_language_symbol_count(self); for (TSSymbol i = 0; i < count; i++) { TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i); if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; @@ -121,7 +160,7 @@ TSFieldId ts_language_field_id_for_name( const char *name, uint32_t name_length ) { - uint32_t count = ts_language_field_count(self); + uint16_t count = (uint16_t)ts_language_field_count(self); for (TSSymbol i = 1; i < count + 1; i++) { switch (strncmp(name, self->field_names[i], name_length)) { case 0: @@ -135,3 +174,48 @@ TSFieldId ts_language_field_id_for_name( } return 0; } + +TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) { + if (state >= self->state_count) return NULL; + LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator)); + *iterator = ts_language_lookaheads(self, state); + return (TSLookaheadIterator *)iterator; +} + +void ts_lookahead_iterator_delete(TSLookaheadIterator *self) { + ts_free(self); +} + +bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) { + LookaheadIterator *iterator = (LookaheadIterator *)self; + if (state >= iterator->language->state_count) return false; + *iterator = ts_language_lookaheads(iterator->language, state); + return true; +} + +const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) { + const LookaheadIterator *iterator = (const LookaheadIterator *)self; + return iterator->language; +} + +bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) { + if (state >= language->state_count) return false; + LookaheadIterator *iterator = (LookaheadIterator *)self; + *iterator = ts_language_lookaheads(language, state); + return true; +} + +bool ts_lookahead_iterator_next(TSLookaheadIterator *self) { + LookaheadIterator *iterator = (LookaheadIterator *)self; + return ts_lookahead_iterator__next(iterator); +} + +TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) { + const LookaheadIterator *iterator = (const LookaheadIterator *)self; + return iterator->symbol; +} + +const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) { + const LookaheadIterator *iterator = (const LookaheadIterator *)self; + return ts_language_symbol_name(iterator->language, iterator->symbol); +} diff --git a/lib/src/language.h b/lib/src/language.h index 7234685..4e2769b 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -6,10 +6,13 @@ extern "C" { #endif #include "./subtree.h" -#include "tree_sitter/parser.h" +#include "./parser.h" #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) +#define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14 +#define LANGUAGE_VERSION_USABLE_VIA_WASM 13 + typedef struct { const TSParseAction *actions; uint32_t action_count; @@ -38,6 +41,8 @@ TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol); TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol); +TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol); + static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) { return 0 < symbol && symbol < self->external_token_count + 1; } @@ -83,7 +88,7 @@ static inline uint16_t ts_language_lookup( for (unsigned i = 0; i < group_count; i++) { uint16_t section_value = *(data++); uint16_t symbol_count = *(data++); - for (unsigned i = 0; i < symbol_count; i++) { + for (unsigned j = 0; j < symbol_count; j++) { if (*(data++) == symbol) return section_value; } } @@ -134,7 +139,7 @@ static inline LookaheadIterator ts_language_lookaheads( }; } -static inline bool ts_lookahead_iterator_next(LookaheadIterator *self) { +static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) { // For small parse states, valid symbols are listed explicitly, // grouped by their value. There's no need to look up the actions // again until moving to the next group. @@ -178,35 +183,13 @@ static inline bool ts_lookahead_iterator_next(LookaheadIterator *self) { return true; } -static inline TSStateId ts_language_next_state( - const TSLanguage *self, - TSStateId state, - TSSymbol symbol -) { - if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { - return 0; - } else if (symbol < self->token_count) { - uint32_t count; - const TSParseAction *actions = ts_language_actions(self, state, symbol, &count); - if (count > 0) { - TSParseAction action = actions[count - 1]; - if (action.type == TSParseActionTypeShift) { - return action.shift.extra ? state : action.shift.state; - } - } - return 0; - } else { - return ts_language_lookup(self, state, symbol); - } -} - // Whether the state is a "primary state". If this returns false, it indicates that there exists // another state that behaves identically to this one with respect to query analysis. static inline bool ts_language_state_is_primary( const TSLanguage *self, TSStateId state ) { - if (self->version >= 14) { + if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { return state == self->primary_state_ids[state]; } else { return true; @@ -269,17 +252,17 @@ static inline void ts_language_aliases_for_symbol( *start = &self->public_symbol_map[original_symbol]; *end = *start + 1; - unsigned i = 0; + unsigned idx = 0; for (;;) { - TSSymbol symbol = self->alias_map[i++]; + TSSymbol symbol = self->alias_map[idx++]; if (symbol == 0 || symbol > original_symbol) break; - uint16_t count = self->alias_map[i++]; + uint16_t count = self->alias_map[idx++]; if (symbol == original_symbol) { - *start = &self->alias_map[i]; - *end = &self->alias_map[i + count]; + *start = &self->alias_map[idx]; + *end = &self->alias_map[idx + count]; break; } - i += count; + idx += count; } } @@ -289,21 +272,21 @@ static inline void ts_language_write_symbol_as_dot_string( TSSymbol symbol ) { const char *name = ts_language_symbol_name(self, symbol); - for (const char *c = name; *c; c++) { - switch (*c) { + for (const char *chr = name; *chr; chr++) { + switch (*chr) { case '"': case '\\': fputc('\\', f); - fputc(*c, f); + fputc(*chr, f); break; case '\n': fputs("\\n", f); break; case '\t': - fputs("\\n", f); + fputs("\\t", f); break; default: - fputc(*c, f); + fputc(*chr, f); break; } } diff --git a/lib/src/lexer.c b/lib/src/lexer.c index acaf3e8..b32a920 100644 --- a/lib/src/lexer.c +++ b/lib/src/lexer.c @@ -172,7 +172,9 @@ static void ts_lexer__do_advance(Lexer *self, bool skip) { self->current_position.bytes >= current_range->end_byte || current_range->end_byte == current_range->start_byte ) { - self->current_included_range_index++; + if (self->current_included_range_index < self->included_range_count) { + self->current_included_range_index++; + } if (self->current_included_range_index < self->included_range_count) { current_range++; self->current_position = (Length) { @@ -209,11 +211,11 @@ static void ts_lexer__advance(TSLexer *_self, bool skip) { if (!self->chunk) return; if (skip) { - LOG("skip", self->data.lookahead); + LOG("skip", self->data.lookahead) } else { - LOG("consume", self->data.lookahead); + LOG("consume", self->data.lookahead) } - + ts_lexer__do_advance(self, skip); } @@ -245,9 +247,9 @@ static void ts_lexer__mark_end(TSLexer *_self) { static uint32_t ts_lexer__get_column(TSLexer *_self) { Lexer *self = (Lexer *)_self; - + uint32_t goal_byte = self->current_position.bytes; - + self->did_get_column = true; self->current_position.bytes -= self->current_position.extent.column; self->current_position.extent.column = 0; @@ -257,10 +259,13 @@ static uint32_t ts_lexer__get_column(TSLexer *_self) { } uint32_t result = 0; - ts_lexer__get_lookahead(self); - while (self->current_position.bytes < goal_byte && !ts_lexer__eof(_self) && self->chunk) { - ts_lexer__do_advance(self, false); - result++; + if (!ts_lexer__eof(_self)) { + ts_lexer__get_lookahead(self); + while (self->current_position.bytes < goal_byte && self->chunk) { + result++; + ts_lexer__do_advance(self, false); + if (ts_lexer__eof(_self)) break; + } } return result; @@ -360,7 +365,7 @@ void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) { // Therefore, the next byte *after* the current (invalid) character // affects the interpretation of the current character. if (self->data.lookahead == TS_DECODE_ERROR) { - current_lookahead_end_byte++; + current_lookahead_end_byte += 4; // the maximum number of bytes read to identify an invalid code point } if (current_lookahead_end_byte > *lookahead_end_byte) { diff --git a/lib/src/lexer.h b/lib/src/lexer.h index c1a5bfd..445c4fd 100644 --- a/lib/src/lexer.h +++ b/lib/src/lexer.h @@ -8,7 +8,7 @@ extern "C" { #include "./length.h" #include "./subtree.h" #include "tree_sitter/api.h" -#include "tree_sitter/parser.h" +#include "./parser.h" typedef struct { TSLexer data; diff --git a/lib/src/lib.c b/lib/src/lib.c index 5aab20d..70671ee 100644 --- a/lib/src/lib.c +++ b/lib/src/lib.c @@ -1,8 +1,3 @@ -// The Tree-sitter library can be built by compiling this one source file. -// -// The following directories must be added to the include path: -// - include - #define _POSIX_C_SOURCE 200112L #include "./alloc.c" @@ -16,3 +11,4 @@ #include "./subtree.c" #include "./tree_cursor.c" #include "./tree.c" +#include "./wasm_store.c" diff --git a/lib/src/node.c b/lib/src/node.c index de59504..1c0eea7 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -237,6 +237,8 @@ static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) return earlier_node; } else { node = earlier_node; + earlier_node = ts_node__null(); + earlier_node_is_relevant = false; } } @@ -423,8 +425,28 @@ const char *ts_node_type(TSNode self) { return ts_language_symbol_name(self.tree->language, symbol); } +const TSLanguage *ts_node_language(TSNode self) { + return self.tree->language; +} + +TSSymbol ts_node_grammar_symbol(TSNode self) { + return ts_subtree_symbol(ts_node__subtree(self)); +} + +const char *ts_node_grammar_type(TSNode self) { + TSSymbol symbol = ts_subtree_symbol(ts_node__subtree(self)); + return ts_language_symbol_name(self.tree->language, symbol); +} + char *ts_node_string(TSNode self) { - return ts_subtree_string(ts_node__subtree(self), self.tree->language, false); + TSSymbol alias_symbol = ts_node__alias(&self); + return ts_subtree_string( + ts_node__subtree(self), + alias_symbol, + ts_language_symbol_metadata(self.tree->language, alias_symbol).visible, + self.tree->language, + false + ); } bool ts_node_eq(TSNode self, TSNode other) { @@ -458,35 +480,60 @@ bool ts_node_has_error(TSNode self) { return ts_subtree_error_cost(ts_node__subtree(self)) > 0; } +bool ts_node_is_error(TSNode self) { + TSSymbol symbol = ts_node_symbol(self); + return symbol == ts_builtin_sym_error; +} + +uint32_t ts_node_descendant_count(TSNode self) { + return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1; +} + +TSStateId ts_node_parse_state(TSNode self) { + return ts_subtree_parse_state(ts_node__subtree(self)); +} + +TSStateId ts_node_next_parse_state(TSNode self) { + const TSLanguage *language = self.tree->language; + uint16_t state = ts_node_parse_state(self); + if (state == TS_TREE_STATE_NONE) { + return TS_TREE_STATE_NONE; + } + uint16_t symbol = ts_node_grammar_symbol(self); + return ts_language_next_state(language, state, symbol); +} + TSNode ts_node_parent(TSNode self) { TSNode node = ts_tree_root_node(self.tree); - uint32_t end_byte = ts_node_end_byte(self); if (node.id == self.id) return ts_node__null(); - TSNode last_visible_node = node; - bool did_descend = true; - while (did_descend) { - did_descend = false; + while (true) { + TSNode next_node = ts_node_child_containing_descendant(node, self); + if (ts_node_is_null(next_node)) break; + node = next_node; + } - TSNode child; - NodeChildIterator iterator = ts_node_iterate_children(&node); - while (ts_node_child_iterator_next(&iterator, &child)) { + return node; +} + +TSNode ts_node_child_containing_descendant(TSNode self, TSNode subnode) { + uint32_t start_byte = ts_node_start_byte(subnode); + uint32_t end_byte = ts_node_end_byte(subnode); + + do { + NodeChildIterator iter = ts_node_iterate_children(&self); + do { if ( - ts_node_start_byte(child) > ts_node_start_byte(self) || - child.id == self.id - ) break; - if (iterator.position.bytes >= end_byte) { - node = child; - if (ts_node__is_relevant(child, true)) { - last_visible_node = node; - } - did_descend = true; - break; + !ts_node_child_iterator_next(&iter, &self) + || ts_node_start_byte(self) > start_byte + || self.id == subnode.id + ) { + return ts_node__null(); } - } - } + } while (iter.position.bytes < end_byte || ts_node_child_count(self) == 0); + } while (!ts_node__is_relevant(self, true)); - return last_visible_node; + return self; } TSNode ts_node_child(TSNode self, uint32_t child_index) { @@ -569,24 +616,61 @@ TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) { return ts_node__null(); } -const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) { - const TSFieldMapEntry *field_map_start = NULL, *field_map_end = NULL; - if (!ts_node_child_count(self)) { +static inline const char *ts_node__field_name_from_language(TSNode self, uint32_t structural_child_index) { + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + self.tree->language, + ts_node__subtree(self).ptr->production_id, + &field_map, + &field_map_end + ); + for (; field_map != field_map_end; field_map++) { + if (!field_map->inherited && field_map->child_index == structural_child_index) { + return self.tree->language->field_names[field_map->field_id]; + } + } return NULL; - } +} - ts_language_field_map( - self.tree->language, - ts_node__subtree(self).ptr->production_id, - &field_map_start, - &field_map_end - ); +const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) { + TSNode result = self; + bool did_descend = true; + const char *inherited_field_name = NULL; + + while (did_descend) { + did_descend = false; + + TSNode child; + uint32_t index = 0; + NodeChildIterator iterator = ts_node_iterate_children(&result); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (ts_node__is_relevant(child, true)) { + if (index == child_index) { + if (ts_node_is_extra(child)) { + return NULL; + } + const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); + if (field_name) return field_name; + return inherited_field_name; + } + index++; + } else { + uint32_t grandchild_index = child_index - index; + uint32_t grandchild_count = ts_node__relevant_child_count(child, true); + if (grandchild_index < grandchild_count) { + const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); + if (field_name) inherited_field_name = field_name; - for (const TSFieldMapEntry *i = field_map_start; i < field_map_end; i++) { - if (i->child_index == child_index) { - return self.tree->language->field_names[i->field_id]; + did_descend = true; + result = child; + child_index = grandchild_index; + break; + } + index += grandchild_count; + } } } + return NULL; } diff --git a/lib/src/parser.c b/lib/src/parser.c index f84b3c8..4d64f37 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1,8 +1,11 @@ +#define _POSIX_C_SOURCE 200112L + #include #include #include #include #include +#include #include "tree_sitter/api.h" #include "./alloc.h" #include "./array.h" @@ -18,6 +21,7 @@ #include "./stack.h" #include "./subtree.h" #include "./tree.h" +#include "./wasm_store.h" #define LOG(...) \ if (self->lexer.logger.log || self->dot_graph_file) { \ @@ -88,6 +92,7 @@ struct TSParser { Stack *stack; SubtreePool tree_pool; const TSLanguage *language; + TSWasmStore *wasm_store; ReduceActionSet reduce_actions; Subtree finished_tree; SubtreeArray trailing_extras; @@ -105,6 +110,7 @@ struct TSParser { Subtree old_tree; TSRangeArray included_range_differences; unsigned included_range_difference_index; + bool has_scanner_error; }; typedef struct { @@ -132,10 +138,10 @@ typedef struct { static const char *ts_string_input_read( void *_self, uint32_t byte, - TSPoint pt, + TSPoint point, uint32_t *length ) { - (void)pt; + (void)point; TSStringInput *self = (TSStringInput *)_self; if (byte >= self->length) { *length = 0; @@ -159,9 +165,9 @@ static void ts_parser__log(TSParser *self) { if (self->dot_graph_file) { fprintf(self->dot_graph_file, "graph {\nlabel=\""); - for (char *c = &self->lexer.debug_buffer[0]; *c != 0; c++) { - if (*c == '"' || *c == '\\') fputc('\\', self->dot_graph_file); - fputc(*c, self->dot_graph_file); + for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) { + if (*chr == '"' || *chr == '\\') fputc('\\', self->dot_graph_file); + fputc(*chr, self->dot_graph_file); } fprintf(self->dot_graph_file, "\"\n}\n\n"); } @@ -332,18 +338,128 @@ static bool ts_parser__better_version_exists( return false; } -static void ts_parser__restore_external_scanner( +static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexMode lex_mode) { + if (ts_language_is_wasm(self->language)) { + return ts_wasm_store_call_lex_main(self->wasm_store, lex_mode.lex_state); + } else { + return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); + } +} + +static bool ts_parser__call_keyword_lex_fn(TSParser *self, TSLexMode lex_mode) { + if (ts_language_is_wasm(self->language)) { + return ts_wasm_store_call_lex_keyword(self->wasm_store, 0); + } else { + return self->language->keyword_lex_fn(&self->lexer.data, 0); + } +} + +static void ts_parser__external_scanner_create( + TSParser *self +) { + if (self->language && self->language->external_scanner.states) { + if (ts_language_is_wasm(self->language)) { + self->external_scanner_payload = (void *)(uintptr_t)ts_wasm_store_call_scanner_create( + self->wasm_store + ); + if (ts_wasm_store_has_error(self->wasm_store)) { + self->has_scanner_error = true; + } + } else if (self->language->external_scanner.create) { + self->external_scanner_payload = self->language->external_scanner.create(); + } + } +} + +static void ts_parser__external_scanner_destroy( + TSParser *self +) { + if ( + self->language && + self->external_scanner_payload && + self->language->external_scanner.destroy && + !ts_language_is_wasm(self->language) + ) { + self->language->external_scanner.destroy( + self->external_scanner_payload + ); + } + self->external_scanner_payload = NULL; +} + +static unsigned ts_parser__external_scanner_serialize( + TSParser *self +) { + if (ts_language_is_wasm(self->language)) { + return ts_wasm_store_call_scanner_serialize( + self->wasm_store, + (uintptr_t)self->external_scanner_payload, + self->lexer.debug_buffer + ); + } else { + uint32_t length = self->language->external_scanner.serialize( + self->external_scanner_payload, + self->lexer.debug_buffer + ); + assert(length <= TREE_SITTER_SERIALIZATION_BUFFER_SIZE); + return length; + } +} + +static void ts_parser__external_scanner_deserialize( TSParser *self, Subtree external_token ) { + const char *data = NULL; + uint32_t length = 0; if (external_token.ptr) { + data = ts_external_scanner_state_data(&external_token.ptr->external_scanner_state); + length = external_token.ptr->external_scanner_state.length; + } + + if (ts_language_is_wasm(self->language)) { + ts_wasm_store_call_scanner_deserialize( + self->wasm_store, + (uintptr_t)self->external_scanner_payload, + data, + length + ); + if (ts_wasm_store_has_error(self->wasm_store)) { + self->has_scanner_error = true; + } + } else { self->language->external_scanner.deserialize( self->external_scanner_payload, - ts_external_scanner_state_data(&external_token.ptr->external_scanner_state), - external_token.ptr->external_scanner_state.length + data, + length ); + } +} + +static bool ts_parser__external_scanner_scan( + TSParser *self, + TSStateId external_lex_state +) { + if (ts_language_is_wasm(self->language)) { + bool result = ts_wasm_store_call_scanner_scan( + self->wasm_store, + (uintptr_t)self->external_scanner_payload, + external_lex_state * self->language->external_token_count + ); + if (ts_wasm_store_has_error(self->wasm_store)) { + self->has_scanner_error = true; + } + return result; } else { - self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0); + const bool *valid_external_tokens = ts_language_enabled_external_tokens( + self->language, + external_lex_state + ); + return self->language->external_scanner.scan( + self->external_scanner_payload, + &self->lexer.data, + valid_external_tokens + ); } } @@ -395,10 +511,6 @@ static Subtree ts_parser__lex( const Length start_position = ts_stack_position(self->stack, version); const Subtree external_token = ts_stack_last_external_token(self->stack, version); - const bool *valid_external_tokens = ts_language_enabled_external_tokens( - self->language, - lex_mode.external_lex_state - ); bool found_external_token = false; bool error_mode = parse_state == ERROR_STATE; @@ -413,9 +525,10 @@ static Subtree ts_parser__lex( ts_lexer_reset(&self->lexer, start_position); for (;;) { + bool found_token = false; Length current_position = self->lexer.current_position; - if (valid_external_tokens) { + if (lex_mode.external_lex_state != 0) { LOG( "lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state, @@ -423,19 +536,13 @@ static Subtree ts_parser__lex( current_position.extent.column ); ts_lexer_start(&self->lexer); - ts_parser__restore_external_scanner(self, external_token); - bool found_token = self->language->external_scanner.scan( - self->external_scanner_payload, - &self->lexer.data, - valid_external_tokens - ); + ts_parser__external_scanner_deserialize(self, external_token); + found_token = ts_parser__external_scanner_scan(self, lex_mode.external_lex_state); + if (self->has_scanner_error) return NULL_SUBTREE; ts_lexer_finish(&self->lexer, &lookahead_end_byte); if (found_token) { - external_scanner_state_len = self->language->external_scanner.serialize( - self->external_scanner_payload, - self->lexer.debug_buffer - ); + external_scanner_state_len = ts_parser__external_scanner_serialize(self); external_scanner_state_changed = !ts_external_scanner_state_eq( ts_subtree_external_scanner_state(external_token), self->lexer.debug_buffer, @@ -482,17 +589,13 @@ static Subtree ts_parser__lex( current_position.extent.column ); ts_lexer_start(&self->lexer); - bool found_token = self->language->lex_fn(&self->lexer.data, lex_mode.lex_state); + found_token = ts_parser__call_main_lex_fn(self, lex_mode); ts_lexer_finish(&self->lexer, &lookahead_end_byte); if (found_token) break; if (!error_mode) { error_mode = true; lex_mode = self->language->lex_modes[ERROR_STATE]; - valid_external_tokens = ts_language_enabled_external_tokens( - self->language, - lex_mode.external_lex_state - ); ts_lexer_reset(&self->lexer, start_position); continue; } @@ -543,12 +646,14 @@ static Subtree ts_parser__lex( uint32_t end_byte = self->lexer.token_end_position.bytes; ts_lexer_reset(&self->lexer, self->lexer.token_start_position); ts_lexer_start(&self->lexer); + + is_keyword = ts_parser__call_keyword_lex_fn(self, lex_mode); + if ( - self->language->keyword_lex_fn(&self->lexer.data, 0) && + is_keyword && self->lexer.token_end_position.bytes == end_byte && ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol) ) { - is_keyword = true; symbol = self->lexer.data.result_symbol; } } @@ -732,14 +837,14 @@ static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) } if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) { - LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u", + LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32, TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left), ts_subtree_dynamic_precedence(left)); return true; } if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) { - LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u", + LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32, TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right), ts_subtree_dynamic_precedence(right)); return false; @@ -747,7 +852,7 @@ static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) if (ts_subtree_error_cost(left) > 0) return true; - int comparison = ts_subtree_compare(left, right); + int comparison = ts_subtree_compare(left, right, &self->tree_pool); switch (comparison) { case -1: LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right)); @@ -871,19 +976,19 @@ static StackVersion ts_parser__reduce( if (next_slice.version != slice.version) break; i++; - SubtreeArray children = next_slice.subtrees; - ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras2); + SubtreeArray next_slice_children = next_slice.subtrees; + ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2); if (ts_parser__select_children( self, ts_subtree_from_mut(parent), - &children + &next_slice_children )) { ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras); ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent)); array_swap(&self->trailing_extras, &self->trailing_extras2); parent = ts_subtree_new_node( - symbol, &children, production_id, self->language + symbol, &next_slice_children, production_id, self->language ); } else { array_clear(&self->trailing_extras2); @@ -994,8 +1099,8 @@ static bool ts_parser__do_all_potential_reductions( if (version >= version_count) break; bool merged = false; - for (StackVersion i = initial_version_count; i < version; i++) { - if (ts_stack_merge(self->stack, i, version)) { + for (StackVersion j = initial_version_count; j < version; j++) { + if (ts_stack_merge(self->stack, j, version)) { merged = true; break; } @@ -1018,8 +1123,8 @@ static bool ts_parser__do_all_potential_reductions( for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) { TableEntry entry; ts_language_table_entry(self->language, state, symbol, &entry); - for (uint32_t i = 0; i < entry.action_count; i++) { - TSParseAction action = entry.actions[i]; + for (uint32_t j = 0; j < entry.action_count; j++) { + TSParseAction action = entry.actions[j]; switch (action.type) { case TSParseActionTypeShift: case TSParseActionTypeRecover: @@ -1041,8 +1146,8 @@ static bool ts_parser__do_all_potential_reductions( } StackVersion reduction_version = STACK_VERSION_NONE; - for (uint32_t i = 0; i < self->reduce_actions.size; i++) { - ReduceAction action = self->reduce_actions.contents[i]; + for (uint32_t j = 0; j < self->reduce_actions.size; j++) { + ReduceAction action = self->reduce_actions.contents[j]; reduction_version = ts_parser__reduce( self, version, action.symbol, action.count, @@ -1328,7 +1433,7 @@ static void ts_parser__handle_error( TSStateId state = ts_stack_state(self->stack, v); for ( TSSymbol missing_symbol = 1; - missing_symbol < self->language->token_count; + missing_symbol < (uint16_t)self->language->token_count; missing_symbol++ ) { TSStateId state_after_missing_symbol = ts_language_next_state( @@ -1392,7 +1497,7 @@ static void ts_parser__handle_error( ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH); // Begin recovery with the current lookahead node, rather than waiting for the - // next turn of the parse loop. This ensures that the tree accounts for the the + // next turn of the parse loop. This ensures that the tree accounts for the // current lookahead token's "lookahead bytes" value, which describes how far // the lexer needed to look ahead beyond the content of the token in order to // recognize it. @@ -1439,6 +1544,7 @@ static bool ts_parser__advance( if (needs_lex) { needs_lex = false; lookahead = ts_parser__lex(self, version, state); + if (self->has_scanner_error) return false; if (lookahead.ptr) { ts_parser__set_cached_token(self, position, last_external_token, lookahead); @@ -1682,7 +1788,7 @@ static unsigned ts_parser__condense_stack(TSParser *self) { } } - // Enfore a hard upper bound on the number of stack versions by + // Enforce a hard upper bound on the number of stack versions by // discarding the least promising versions. while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) { ts_stack_remove_version(self->stack, MAX_VERSION_COUNT); @@ -1723,6 +1829,7 @@ static unsigned ts_parser__condense_stack(TSParser *self) { static bool ts_parser_has_outstanding_parse(TSParser *self) { return ( + self->external_scanner_payload || ts_stack_state(self->stack, 0) != 1 || ts_stack_node_count_since_error(self->stack, 0) != 0 ); @@ -1742,6 +1849,9 @@ TSParser *ts_parser_new(void) { self->dot_graph_file = NULL; self->cancellation_flag = NULL; self->timeout_duration = 0; + self->language = NULL; + self->has_scanner_error = false; + self->external_scanner_payload = NULL; self->end_clock = clock_null(); self->operation_count = 0; self->old_tree = NULL_SUBTREE; @@ -1766,6 +1876,7 @@ void ts_parser_delete(TSParser *self) { ts_subtree_release(&self->tree_pool, self->old_tree); self->old_tree = NULL_SUBTREE; } + ts_wasm_store_delete(self->wasm_store); ts_lexer_delete(&self->lexer); ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE); ts_subtree_pool_delete(&self->tree_pool); @@ -1781,23 +1892,25 @@ const TSLanguage *ts_parser_language(const TSParser *self) { } bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { - if (language) { - if (language->version > TREE_SITTER_LANGUAGE_VERSION) return false; - if (language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false; - } + ts_parser_reset(self); + ts_language_delete(self->language); + self->language = NULL; - if (self->external_scanner_payload && self->language->external_scanner.destroy) { - self->language->external_scanner.destroy(self->external_scanner_payload); - } + if (language) { + if ( + language->version > TREE_SITTER_LANGUAGE_VERSION || + language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION + ) return false; - if (language && language->external_scanner.create) { - self->external_scanner_payload = language->external_scanner.create(); - } else { - self->external_scanner_payload = NULL; + if (ts_language_is_wasm(language)) { + if ( + !self->wasm_store || + !ts_wasm_store_start(self->wasm_store, &self->lexer.data, language) + ) return false; + } } - self->language = language; - ts_parser_reset(self); + self->language = ts_language_copy(language); return true; } @@ -1815,7 +1928,11 @@ void ts_parser_print_dot_graphs(TSParser *self, int fd) { } if (fd >= 0) { + #ifdef _WIN32 + self->dot_graph_file = _fdopen(fd, "a"); + #else self->dot_graph_file = fdopen(fd, "a"); + #endif } else { self->dot_graph_file = NULL; } @@ -1850,8 +1967,9 @@ const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) } void ts_parser_reset(TSParser *self) { - if (self->language && self->language->external_scanner.deserialize) { - self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0); + ts_parser__external_scanner_destroy(self); + if (self->wasm_store) { + ts_wasm_store_reset(self->wasm_store); } if (self->old_tree.ptr) { @@ -1868,6 +1986,7 @@ void ts_parser_reset(TSParser *self) { self->finished_tree = NULL_SUBTREE; } self->accept_count = 0; + self->has_scanner_error = false; } TSTree *ts_parser_parse( @@ -1875,33 +1994,43 @@ TSTree *ts_parser_parse( const TSTree *old_tree, TSInput input ) { + TSTree *result = NULL; if (!self->language || !input.read) return NULL; - ts_lexer_set_input(&self->lexer, input); + if (ts_language_is_wasm(self->language)) { + if (!self->wasm_store) return NULL; + ts_wasm_store_start(self->wasm_store, &self->lexer.data, self->language); + } + ts_lexer_set_input(&self->lexer, input); array_clear(&self->included_range_differences); self->included_range_difference_index = 0; if (ts_parser_has_outstanding_parse(self)) { LOG("resume_parsing"); - } else if (old_tree) { - ts_subtree_retain(old_tree->root); - self->old_tree = old_tree->root; - ts_range_array_get_changed_ranges( - old_tree->included_ranges, old_tree->included_range_count, - self->lexer.included_ranges, self->lexer.included_range_count, - &self->included_range_differences - ); - reusable_node_reset(&self->reusable_node, old_tree->root); - LOG("parse_after_edit"); - LOG_TREE(self->old_tree); - for (unsigned i = 0; i < self->included_range_differences.size; i++) { - TSRange *range = &self->included_range_differences.contents[i]; - LOG("different_included_range %u - %u", range->start_byte, range->end_byte); - } } else { - reusable_node_clear(&self->reusable_node); - LOG("new_parse"); + ts_parser__external_scanner_create(self); + if (self->has_scanner_error) goto exit; + + if (old_tree) { + ts_subtree_retain(old_tree->root); + self->old_tree = old_tree->root; + ts_range_array_get_changed_ranges( + old_tree->included_ranges, old_tree->included_range_count, + self->lexer.included_ranges, self->lexer.included_range_count, + &self->included_range_differences + ); + reusable_node_reset(&self->reusable_node, old_tree->root); + LOG("parse_after_edit"); + LOG_TREE(self->old_tree); + for (unsigned i = 0; i < self->included_range_differences.size; i++) { + TSRange *range = &self->included_range_differences.contents[i]; + LOG("different_included_range %u - %u", range->start_byte, range->end_byte); + } + } else { + reusable_node_clear(&self->reusable_node); + LOG("new_parse"); + } } self->operation_count = 0; @@ -1922,7 +2051,7 @@ TSTree *ts_parser_parse( bool allow_node_reuse = version_count == 1; while (ts_stack_is_active(self->stack, version)) { LOG( - "process version:%d, version_count:%u, state:%d, row:%u, col:%u", + "process version:%u, version_count:%u, state:%d, row:%u, col:%u", version, ts_stack_version_count(self->stack), ts_stack_state(self->stack, version), @@ -1930,7 +2059,11 @@ TSTree *ts_parser_parse( ts_stack_position(self->stack, version).extent.column ); - if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL; + if (!ts_parser__advance(self, version, allow_node_reuse)) { + if (self->has_scanner_error) goto exit; + return NULL; + } + LOG_STACK(); position = ts_stack_position(self->stack, version).bytes; @@ -1969,13 +2102,15 @@ TSTree *ts_parser_parse( LOG("done"); LOG_TREE(self->finished_tree); - TSTree *result = ts_tree_new( + result = ts_tree_new( self->finished_tree, self->language, self->lexer.included_ranges, self->lexer.included_range_count ); self->finished_tree = NULL_SUBTREE; + +exit: ts_parser_reset(self); return result; } @@ -2004,4 +2139,15 @@ TSTree *ts_parser_parse_string_encoding( }); } +void ts_parser_set_wasm_store(TSParser *self, TSWasmStore *store) { + ts_wasm_store_delete(self->wasm_store); + self->wasm_store = store; +} + +TSWasmStore *ts_parser_take_wasm_store(TSParser *self) { + TSWasmStore *result = self->wasm_store; + self->wasm_store = NULL; + return result; +} + #undef LOG diff --git a/lib/include/tree_sitter/parser.h b/lib/src/parser.h similarity index 69% rename from lib/include/tree_sitter/parser.h rename to lib/src/parser.h index 2b14ac1..17f0e94 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/src/parser.h @@ -13,9 +13,8 @@ extern "C" { #define ts_builtin_sym_end 0 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 -typedef uint16_t TSStateId; - #ifndef TREE_SITTER_API_H_ +typedef uint16_t TSStateId; typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; @@ -87,6 +86,11 @@ typedef union { } entry; } TSParseActionEntry; +typedef struct { + int32_t start; + int32_t end; +} TSCharacterRange; + struct TSLanguage { uint32_t version; uint32_t symbol_count; @@ -126,13 +130,38 @@ struct TSLanguage { const TSStateId *primary_state_ids; }; +static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { + uint32_t index = 0; + uint32_t size = len - index; + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = index + half_size; + TSCharacterRange *range = &ranges[mid_index]; + if (lookahead >= range->start && lookahead <= range->end) { + return true; + } else if (lookahead > range->end) { + index = mid_index; + } + size -= half_size; + } + TSCharacterRange *range = &ranges[index]; + return (lookahead >= range->start && lookahead <= range->end); +} + /* * Lexer Macros */ +#ifdef _MSC_VER +#define UNUSED __pragma(warning(suppress : 4101)) +#else +#define UNUSED __attribute__((unused)) +#endif + #define START_LEXER() \ bool result = false; \ bool skip = false; \ + UNUSED \ bool eof = false; \ int32_t lookahead; \ goto start; \ @@ -148,6 +177,17 @@ struct TSLanguage { goto next_state; \ } +#define ADVANCE_MAP(...) \ + { \ + static const uint16_t map[] = { __VA_ARGS__ }; \ + for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ + if (map[i] == lookahead) { \ + state = map[i + 1]; \ + goto next_state; \ + } \ + } \ + } + #define SKIP(state_value) \ { \ skip = true; \ @@ -166,7 +206,7 @@ struct TSLanguage { * Parse Table Macros */ -#define SMALL_STATE(id) id - LARGE_STATE_COUNT +#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) #define STATE(id) id @@ -176,7 +216,7 @@ struct TSLanguage { {{ \ .shift = { \ .type = TSParseActionTypeShift, \ - .state = state_value \ + .state = (state_value) \ } \ }} @@ -184,7 +224,7 @@ struct TSLanguage { {{ \ .shift = { \ .type = TSParseActionTypeShift, \ - .state = state_value, \ + .state = (state_value), \ .repetition = true \ } \ }} @@ -197,14 +237,15 @@ struct TSLanguage { } \ }} -#define REDUCE(symbol_val, child_count_val, ...) \ - {{ \ - .reduce = { \ - .type = TSParseActionTypeReduce, \ - .symbol = symbol_val, \ - .child_count = child_count_val, \ - __VA_ARGS__ \ - }, \ +#define REDUCE(symbol_name, children, precedence, prod_id) \ + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_name, \ + .child_count = children, \ + .dynamic_precedence = precedence, \ + .production_id = prod_id \ + }, \ }} #define RECOVER() \ diff --git a/lib/src/query.c b/lib/src/query.c index cfe1143..eb10bbc 100644 --- a/lib/src/query.c +++ b/lib/src/query.c @@ -42,7 +42,7 @@ typedef struct { * - `depth` - The depth where this node occurs in the pattern. The root node * of the pattern has depth zero. * - `negated_field_list_id` - An id representing a set of fields that must - * that must not be present on a node matching this step. + * not be present on a node matching this step. * * Steps have some additional fields in order to handle the `.` (or "anchor") operator, * which forbids additional child nodes: @@ -305,6 +305,7 @@ struct TSQueryCursor { Array(QueryState) finished_states; CaptureListPool capture_list_pool; uint32_t depth; + uint32_t max_start_depth; uint32_t start_byte; uint32_t end_byte; TSPoint start_point; @@ -331,7 +332,7 @@ static bool stream_advance(Stream *self) { if (self->input < self->end) { uint32_t size = ts_decode_utf8( (const uint8_t *)self->input, - self->end - self->input, + (uint32_t)(self->end - self->input), &self->next ); if (size > 0) { @@ -398,7 +399,7 @@ static void stream_scan_identifier(Stream *stream) { } static uint32_t stream_offset(Stream *self) { - return self->input - self->start; + return (uint32_t)(self->input - self->start); } /****************** @@ -415,7 +416,7 @@ static CaptureListPool capture_list_pool_new(void) { } static void capture_list_pool_reset(CaptureListPool *self) { - for (uint16_t i = 0; i < self->list.size; i++) { + for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { // This invalid size means that the list is not in use. self->list.contents[i].size = UINT32_MAX; } @@ -423,7 +424,7 @@ static void capture_list_pool_reset(CaptureListPool *self) { } static void capture_list_pool_delete(CaptureListPool *self) { - for (uint16_t i = 0; i < self->list.size; i++) { + for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { array_delete(&self->list.contents[i]); } array_delete(&self->list); @@ -448,7 +449,7 @@ static bool capture_list_pool_is_empty(const CaptureListPool *self) { static uint16_t capture_list_pool_acquire(CaptureListPool *self) { // First see if any already allocated capture list is currently unused. if (self->free_capture_list_count > 0) { - for (uint16_t i = 0; i < self->list.size; i++) { + for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) { if (self->list.contents[i].size == UINT32_MAX) { array_clear(&self->list.contents[i]); self->free_capture_list_count--; @@ -695,7 +696,7 @@ static void capture_quantifiers_add_all( if (self->size < quantifiers->size) { array_grow_by(self, quantifiers->size - self->size); } - for (uint16_t id = 0; id < quantifiers->size; id++) { + for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) { uint8_t *quantifier = array_get(quantifiers, id); uint8_t *own_quantifier = array_get(self, id); *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier); @@ -707,7 +708,7 @@ static void capture_quantifiers_mul( CaptureQuantifiers *self, TSQuantifier quantifier ) { - for (uint16_t id = 0; id < self->size; id++) { + for (uint16_t id = 0; id < (uint16_t)self->size; id++) { uint8_t *own_quantifier = array_get(self, id); *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier); } @@ -800,11 +801,10 @@ static QueryStep query_step__new( uint16_t depth, bool is_immediate ) { - return (QueryStep) { + QueryStep step = { .symbol = symbol, .depth = depth, .field = 0, - .capture_ids = {NONE, NONE, NONE}, .alternative_index = NONE, .negated_field_list_id = 0, .contains_captures = false, @@ -816,6 +816,10 @@ static QueryStep query_step__new( .is_immediate = is_immediate, .alternative_is_immediate = false, }; + for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { + step.capture_ids[i] = NONE; + } + return step; } static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { @@ -938,6 +942,9 @@ static inline int analysis_state__compare( } static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) { + if (self->depth == 0) { + return &self->stack[0]; + } return &self->stack[self->depth - 1]; } @@ -1023,7 +1030,7 @@ static inline void analysis_state_set__delete(AnalysisStateSet *self) { * QueryAnalyzer ****************/ -static inline QueryAnalysis query_analysis__new() { +static inline QueryAnalysis query_analysis__new(void) { return (QueryAnalysis) { .states = array_new(), .next_states = array_new(), @@ -1258,7 +1265,7 @@ static void ts_query__perform_analysis( // Follow every possible path in the parse table, but only visit states that // are part of the subgraph for the current symbol. LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state); - while (ts_lookahead_iterator_next(&lookahead_iterator)) { + while (ts_lookahead_iterator__next(&lookahead_iterator)) { TSSymbol sym = lookahead_iterator.symbol; AnalysisSubgraphNode successor = { @@ -1512,7 +1519,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { AnalysisSubgraph subgraph = { .symbol = parent_symbol }; array_insert_sorted_by(&subgraphs, .symbol, subgraph); } - for (TSSymbol sym = self->language->token_count; sym < self->language->symbol_count; sym++) { + for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) { if (!ts_language_symbol_metadata(self->language, sym).visible) { AnalysisSubgraph subgraph = { .symbol = sym }; array_insert_sorted_by(&subgraphs, .symbol, subgraph); @@ -1526,10 +1533,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // with information about the node that would be created. // 3) A list of predecessor states for each state. StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language); - for (TSStateId state = 1; state < self->language->state_count; state++) { + for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) { unsigned subgraph_index, exists; LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state); - while (ts_lookahead_iterator_next(&lookahead_iterator)) { + while (ts_lookahead_iterator__next(&lookahead_iterator)) { if (lookahead_iterator.action_count) { for (unsigned i = 0; i < lookahead_iterator.action_count; i++) { const TSParseAction *action = &lookahead_iterator.actions[i]; @@ -1680,10 +1687,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists); if (!exists) { unsigned first_child_step_index = parent_step_index + 1; - uint32_t i, exists; - array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &i, &exists); - assert(exists); - *error_offset = self->step_offsets.contents[i].byte_offset; + uint32_t j, child_exists; + array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists); + assert(child_exists); + *error_offset = self->step_offsets.contents[j].byte_offset; all_patterns_are_valid = false; break; } @@ -1743,10 +1750,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { if (analysis.finished_parent_symbols.size == 0) { assert(analysis.final_step_indices.size > 0); uint16_t impossible_step_index = *array_back(&analysis.final_step_indices); - uint32_t i, exists; - array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists); - if (i >= self->step_offsets.size) i = self->step_offsets.size - 1; - *error_offset = self->step_offsets.contents[i].byte_offset; + uint32_t j, impossible_exists; + array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists); + if (j >= self->step_offsets.size) j = self->step_offsets.size - 1; + *error_offset = self->step_offsets.contents[j].byte_offset; all_patterns_are_valid = false; break; } @@ -1781,7 +1788,8 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { ) { TSQueryPredicateStep *step = &self->predicate_steps.contents[j]; if (step->type == TSQueryPredicateStepTypeCapture) { - array_insert_sorted_by(&predicate_capture_ids, , step->value_id); + uint16_t value_id = step->value_id; + array_insert_sorted_by(&predicate_capture_ids, , value_id); } } @@ -1899,7 +1907,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } #ifdef DEBUG_ANALYZE_QUERY - printf("\nWalk states for rootless pattern step %u:\n", step_index); + printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index); #endif ts_query__perform_analysis( @@ -2064,7 +2072,7 @@ static TSQueryError ts_query__parse_predicate( if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; const char *predicate_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - predicate_name; + uint32_t length = (uint32_t)(stream->input - predicate_name); uint16_t id = symbol_table_insert_name( &self->predicate_values, predicate_name, @@ -2095,13 +2103,13 @@ static TSQueryError ts_query__parse_predicate( if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; const char *capture_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - capture_name; + uint32_t capture_length = (uint32_t)(stream->input - capture_name); // Add the capture id to the first step of the pattern int capture_id = symbol_table_id_for_name( &self->captures, capture_name, - length + capture_length ); if (capture_id == -1) { stream_reset(stream, capture_name); @@ -2118,14 +2126,14 @@ static TSQueryError ts_query__parse_predicate( else if (stream->next == '"') { TSQueryError e = ts_query__parse_string_literal(self, stream); if (e) return e; - uint16_t id = symbol_table_insert_name( + uint16_t query_id = symbol_table_insert_name( &self->predicate_values, self->string_buffer.contents, self->string_buffer.size ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, - .value_id = id, + .value_id = query_id, })); } @@ -2133,15 +2141,15 @@ static TSQueryError ts_query__parse_predicate( else if (stream_is_ident_start(stream)) { const char *symbol_start = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - symbol_start; - uint16_t id = symbol_table_insert_name( + uint32_t symbol_length = (uint32_t)(stream->input - symbol_start); + uint16_t query_id = symbol_table_insert_name( &self->predicate_values, symbol_start, - length + symbol_length ); array_push(&self->predicate_steps, ((TSQueryPredicateStep) { .type = TSQueryPredicateStepTypeString, - .value_id = id, + .value_id = query_id, })); } @@ -2253,7 +2261,7 @@ static TSQueryError ts_query__parse_pattern( // If this parenthesis is followed by a node, then it represents a grouped sequence. if (stream->next == '(' || stream->next == '"' || stream->next == '[') { - bool child_is_immediate = false; + bool child_is_immediate = is_immediate; CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new(); for (;;) { if (stream->next == '.') { @@ -2302,17 +2310,10 @@ static TSQueryError ts_query__parse_pattern( if (stream_is_ident_start(stream)) { const char *node_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - node_name; - - // TODO - remove. - // For temporary backward compatibility, handle predicates without the leading '#' sign. - if (length > 0 && (node_name[length - 1] == '!' || node_name[length - 1] == '?')) { - stream_reset(stream, node_name); - return ts_query__parse_predicate(self, stream); - } + uint32_t length = (uint32_t)(stream->input - node_name); // Parse the wildcard symbol - else if (length == 1 && node_name[0] == '_') { + if (length == 1 && node_name[0] == '_') { symbol = WILDCARD_SYMBOL; } @@ -2353,7 +2354,7 @@ static TSQueryError ts_query__parse_pattern( const char *node_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - node_name; + uint32_t length = (uint32_t)(stream->input - node_name); step->symbol = ts_language_symbol_for_name( self->language, @@ -2386,7 +2387,7 @@ static TSQueryError ts_query__parse_pattern( } const char *field_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - field_name; + uint32_t length = (uint32_t)(stream->input - field_name); stream_skip_whitespace(stream); TSFieldId field_id = ts_language_field_id_for_name( @@ -2497,7 +2498,7 @@ static TSQueryError ts_query__parse_pattern( // Parse the field name const char *field_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - field_name; + uint32_t length = (uint32_t)(stream->input - field_name); stream_skip_whitespace(stream); if (stream->next != ':') { @@ -2589,8 +2590,11 @@ static TSQueryError ts_query__parse_pattern( repeat_step.alternative_is_immediate = true; array_push(&self->steps, repeat_step); + // Stop when `step->alternative_index` is `NONE` or it points to + // `repeat_step` or beyond. Note that having just been pushed, + // `repeat_step` occupies slot `self->steps.size - 1`. QueryStep *step = &self->steps.contents[starting_step_index]; - while (step->alternative_index != NONE) { + while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) { step = &self->steps.contents[step->alternative_index]; } step->alternative_index = self->steps.size; @@ -2604,7 +2608,7 @@ static TSQueryError ts_query__parse_pattern( stream_skip_whitespace(stream); QueryStep *step = &self->steps.contents[starting_step_index]; - while (step->alternative_index != NONE) { + while (step->alternative_index != NONE && step->alternative_index < self->steps.size) { step = &self->steps.contents[step->alternative_index]; } step->alternative_index = self->steps.size; @@ -2616,7 +2620,7 @@ static TSQueryError ts_query__parse_pattern( if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; const char *capture_name = stream->input; stream_scan_identifier(stream); - uint32_t length = stream->input - capture_name; + uint32_t length = (uint32_t)(stream->input - capture_name); stream_skip_whitespace(stream); // Add the capture id to the first step of the pattern @@ -2639,7 +2643,6 @@ static TSQueryError ts_query__parse_pattern( step->alternative_index < self->steps.size ) { step_index = step->alternative_index; - step = &self->steps.contents[step_index]; } else { break; } @@ -2687,7 +2690,7 @@ TSQuery *ts_query_new( .negated_fields = array_new(), .repeat_symbols_with_rootless_patterns = array_new(), .wildcard_root_pattern_count = 0, - .language = language, + .language = ts_language_copy(language), }; array_push(&self->negated_fields, 0); @@ -2751,9 +2754,9 @@ TSQuery *ts_query_new( uint32_t start_depth = step->depth; bool is_rooted = start_depth == 0; for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) { - QueryStep *step = &self->steps.contents[step_index]; - if (step->is_dead_end) break; - if (step->depth == start_depth) { + QueryStep *child_step = &self->steps.contents[step_index]; + if (child_step->is_dead_end) break; + if (child_step->depth == start_depth) { is_rooted = false; break; } @@ -2772,7 +2775,6 @@ TSQuery *ts_query_new( // then add multiple entries to the pattern map. if (step->alternative_index != NONE) { start_step_index = step->alternative_index; - step->alternative_index = NONE; } else if (wildcard_root_alternative_index != NONE) { start_step_index = wildcard_root_alternative_index; wildcard_root_alternative_index = NONE; @@ -2802,6 +2804,7 @@ void ts_query_delete(TSQuery *self) { array_delete(&self->string_buffer); array_delete(&self->negated_fields); array_delete(&self->repeat_symbols_with_rootless_patterns); + ts_language_delete(self->language); symbol_table_delete(&self->captures); symbol_table_delete(&self->predicate_values); for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) { @@ -2973,6 +2976,7 @@ TSQueryCursor *ts_query_cursor_new(void) { .end_byte = UINT32_MAX, .start_point = {0, 0}, .end_point = POINT_MAX, + .max_start_depth = UINT32_MAX, }; array_reserve(&self->states, 8); array_reserve(&self->finished_states, 8); @@ -2999,11 +3003,43 @@ void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { self->capture_list_pool.max_capture_list_count = limit; } +#ifdef DEBUG_EXECUTE_QUERY +#define LOG(...) fprintf(stderr, __VA_ARGS__) +#else +#define LOG(...) +#endif + void ts_query_cursor_exec( TSQueryCursor *self, const TSQuery *query, TSNode node ) { + if (query) { + LOG("query steps:\n"); + for (unsigned i = 0; i < query->steps.size; i++) { + QueryStep *step = &query->steps.contents[i]; + LOG(" %u: {", i); + if (step->depth == PATTERN_DONE_MARKER) { + LOG("DONE"); + } else if (step->is_dead_end) { + LOG("dead_end"); + } else if (step->is_pass_through) { + LOG("pass_through"); + } else if (step->symbol != WILDCARD_SYMBOL) { + LOG("symbol: %s", query->language->symbol_names[step->symbol]); + } else { + LOG("symbol: *"); + } + if (step->field) { + LOG(", field: %s", query->language->field_names[step->field]); + } + if (step->alternative_index != NONE) { + LOG(", alternative: %u", step->alternative_index); + } + LOG("},\n"); + } + } + array_clear(&self->states); array_clear(&self->finished_states); ts_tree_cursor_reset(&self->cursor, node); @@ -3171,12 +3207,6 @@ void ts_query_cursor__compare_captures( } } -#ifdef DEBUG_EXECUTE_QUERY -#define LOG(...) fprintf(stderr, __VA_ARGS__) -#else -#define LOG(...) -#endif - static void ts_query_cursor__add_state( TSQueryCursor *self, const PatternEntry *pattern @@ -3323,7 +3353,7 @@ static QueryState *ts_query_cursor__copy_state( QueryState **state_ref ) { const QueryState *state = *state_ref; - uint32_t state_index = state - self->states.contents; + uint32_t state_index = (uint32_t)(state - self->states.contents); QueryState copy = *state; copy.capture_list_id = NONE; @@ -3343,9 +3373,15 @@ static QueryState *ts_query_cursor__copy_state( return &self->states.contents[state_index + 1]; } -static inline bool ts_query_cursor__should_descend_outside_of_range( - TSQueryCursor *self +static inline bool ts_query_cursor__should_descend( + TSQueryCursor *self, + bool node_intersects_range ) { + + if (node_intersects_range && self->depth < self->max_start_depth) { + return true; + } + // If there are in-progress matches whose remaining steps occur // deeper in the tree, then descend. for (unsigned i = 0; i < self->states.size; i++) { @@ -3359,6 +3395,10 @@ static inline bool ts_query_cursor__should_descend_outside_of_range( } } + if (self->depth >= self->max_start_depth) { + return false; + } + // If the current node is hidden, then a non-rooted pattern might match // one if its roots inside of this node, and match another of its roots // as part of a sibling node, so we may need to descend. @@ -3417,34 +3457,7 @@ static inline bool ts_query_cursor__advance( self->depth, ts_node_type(ts_tree_cursor_current_node(&self->cursor)) ); - } - - // Leave this node by stepping to its next sibling or to its parent. - switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) { - case TreeCursorStepVisible: - if (!self->on_visible_node) { - self->depth++; - self->on_visible_node = true; - } - self->ascending = false; - break; - case TreeCursorStepHidden: - if (self->on_visible_node) { - self->depth--; - self->on_visible_node = false; - } - self->ascending = false; - break; - default: - if (ts_tree_cursor_goto_parent(&self->cursor)) { - self->depth--; - } else { - LOG("halt at root\n"); - self->halted = true; - } - } - if (self->on_visible_node) { // After leaving a node, remove any states that cannot make further progress. uint32_t deleted_count = 0; for (unsigned i = 0, n = self->states.size; i < n; i++) { @@ -3453,19 +3466,22 @@ static inline bool ts_query_cursor__advance( // If a state completed its pattern inside of this node, but was deferred from finishing // in order to search for longer matches, mark it as finished. - if (step->depth == PATTERN_DONE_MARKER) { - if (state->start_depth > self->depth || self->halted) { - LOG(" finish pattern %u\n", state->pattern_index); - array_push(&self->finished_states, *state); - did_match = true; - deleted_count++; - continue; - } + if ( + step->depth == PATTERN_DONE_MARKER && + (state->start_depth > self->depth || self->depth == 0) + ) { + LOG(" finish pattern %u\n", state->pattern_index); + array_push(&self->finished_states, *state); + did_match = true; + deleted_count++; } // If a state needed to match something within this node, then remove that state // as it has failed to match. - else if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) { + else if ( + step->depth != PATTERN_DONE_MARKER && + (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth + ) { LOG( " failed to match. pattern:%u, step:%u\n", state->pattern_index, @@ -3476,15 +3492,39 @@ static inline bool ts_query_cursor__advance( state->capture_list_id ); deleted_count++; - continue; } - if (deleted_count > 0) { + else if (deleted_count > 0) { self->states.contents[i - deleted_count] = *state; } } self->states.size -= deleted_count; } + + // Leave this node by stepping to its next sibling or to its parent. + switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) { + case TreeCursorStepVisible: + if (!self->on_visible_node) { + self->depth++; + self->on_visible_node = true; + } + self->ascending = false; + break; + case TreeCursorStepHidden: + if (self->on_visible_node) { + self->depth--; + self->on_visible_node = false; + } + self->ascending = false; + break; + default: + if (ts_tree_cursor_goto_parent(&self->cursor)) { + self->depth--; + } else { + LOG("halt at root\n"); + self->halted = true; + } + } } // Enter a new node. @@ -3552,12 +3592,14 @@ static inline bool ts_query_cursor__advance( // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. QueryStep *step = &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; if ( (pattern->is_rooted ? node_intersects_range : (parent_intersects_range && !parent_is_error)) && (!step->field || field_id == step->field) && - (!step->supertype_symbol || supertype_count > 0) + (!step->supertype_symbol || supertype_count > 0) && + (start_depth <= self->max_start_depth) ) { ts_query_cursor__add_state(self, pattern); } @@ -3570,6 +3612,7 @@ static inline bool ts_query_cursor__advance( PatternEntry *pattern = &self->query->pattern_map.contents[i]; QueryStep *step = &self->query->steps.contents[pattern->step_index]; + uint32_t start_depth = self->depth - step->depth; do { // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. @@ -3577,7 +3620,8 @@ static inline bool ts_query_cursor__advance( (pattern->is_rooted ? node_intersects_range : (parent_intersects_range && !parent_is_error)) && - (!step->field || field_id == step->field) + (!step->field || field_id == step->field) && + (start_depth <= self->max_start_depth) ) { ts_query_cursor__add_state(self, pattern); } @@ -3591,8 +3635,8 @@ static inline bool ts_query_cursor__advance( } // Update all of the in-progress states with current node. - for (unsigned i = 0, copy_count = 0; i < self->states.size; i += 1 + copy_count) { - QueryState *state = &self->states.contents[i]; + for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) { + QueryState *state = &self->states.contents[j]; QueryStep *step = &self->query->steps.contents[state->step_index]; state->has_in_progress_alternatives = false; copy_count = 0; @@ -3619,8 +3663,8 @@ static inline bool ts_query_cursor__advance( } if (step->supertype_symbol) { bool has_supertype = false; - for (unsigned j = 0; j < supertype_count; j++) { - if (supertypes[j] == step->supertype_symbol) { + for (unsigned k = 0; k < supertype_count; k++) { + if (supertypes[k] == step->supertype_symbol) { has_supertype = true; break; } @@ -3665,8 +3709,8 @@ static inline bool ts_query_cursor__advance( &self->capture_list_pool, state->capture_list_id ); - array_erase(&self->states, i); - i--; + array_erase(&self->states, j); + j--; } continue; } @@ -3726,8 +3770,8 @@ static inline bool ts_query_cursor__advance( } if (state->dead) { - array_erase(&self->states, i); - i--; + array_erase(&self->states, j); + j--; continue; } @@ -3746,29 +3790,29 @@ static inline bool ts_query_cursor__advance( // If this state's next step has an alternative step, then copy the state in order // to pursue both alternatives. The alternative step itself may have an alternative, // so this is an interactive process. - unsigned end_index = i + 1; - for (unsigned j = i; j < end_index; j++) { - QueryState *state = &self->states.contents[j]; - QueryStep *next_step = &self->query->steps.contents[state->step_index]; - if (next_step->alternative_index != NONE) { + unsigned end_index = j + 1; + for (unsigned k = j; k < end_index; k++) { + QueryState *child_state = &self->states.contents[k]; + QueryStep *child_step = &self->query->steps.contents[child_state->step_index]; + if (child_step->alternative_index != NONE) { // A "dead-end" step exists only to add a non-sequential jump into the step sequence, // via its alternative index. When a state reaches a dead-end step, it jumps straight // to the step's alternative. - if (next_step->is_dead_end) { - state->step_index = next_step->alternative_index; - j--; + if (child_step->is_dead_end) { + child_state->step_index = child_step->alternative_index; + k--; continue; } // A "pass-through" step exists only to add a branch into the step sequence, // via its alternative_index. When a state reaches a pass-through step, it splits // in order to process the alternative step, and then it advances to the next step. - if (next_step->is_pass_through) { - state->step_index++; - j--; + if (child_step->is_pass_through) { + child_state->step_index++; + k--; } - QueryState *copy = ts_query_cursor__copy_state(self, &state); + QueryState *copy = ts_query_cursor__copy_state(self, &child_state); if (copy) { LOG( " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n", @@ -3780,8 +3824,8 @@ static inline bool ts_query_cursor__advance( ); end_index++; copy_count++; - copy->step_index = next_step->alternative_index; - if (next_step->alternative_is_immediate) { + copy->step_index = child_step->alternative_index; + if (child_step->alternative_is_immediate) { copy->seeking_immediate_match = true; } } @@ -3789,20 +3833,20 @@ static inline bool ts_query_cursor__advance( } } - for (unsigned i = 0; i < self->states.size; i++) { - QueryState *state = &self->states.contents[i]; + for (unsigned j = 0; j < self->states.size; j++) { + QueryState *state = &self->states.contents[j]; if (state->dead) { - array_erase(&self->states, i); - i--; + array_erase(&self->states, j); + j--; continue; } - // Enfore the longest-match criteria. When a query pattern contains optional or + // Enforce the longest-match criteria. When a query pattern contains optional or // repeated nodes, this is necessary to avoid multiple redundant states, where // one state has a strict subset of another state's captures. bool did_remove = false; - for (unsigned j = i + 1; j < self->states.size; j++) { - QueryState *other_state = &self->states.contents[j]; + for (unsigned k = j + 1; k < self->states.size; k++) { + QueryState *other_state = &self->states.contents[k]; // Query states are kept in ascending order of start_depth and pattern_index. // Since the longest-match criteria is only used for deduping matches of the same @@ -3829,8 +3873,8 @@ static inline bool ts_query_cursor__advance( state->step_index ); capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id); - array_erase(&self->states, j); - j--; + array_erase(&self->states, k); + k--; continue; } other_state->has_in_progress_alternatives = true; @@ -3843,8 +3887,8 @@ static inline bool ts_query_cursor__advance( state->step_index ); capture_list_pool_release(&self->capture_list_pool, state->capture_list_id); - array_erase(&self->states, i); - i--; + array_erase(&self->states, j); + j--; did_remove = true; break; } @@ -3871,17 +3915,14 @@ static inline bool ts_query_cursor__advance( array_push(&self->finished_states, *state); array_erase(&self->states, (uint32_t)(state - self->states.contents)); did_match = true; - i--; + j--; } } } } } - bool should_descend = - node_intersects_range || - ts_query_cursor__should_descend_outside_of_range(self); - if (should_descend) { + if (ts_query_cursor__should_descend(self, node_intersects_range)) { switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) { case TreeCursorStepVisible: self->depth++; @@ -4000,9 +4041,20 @@ bool ts_query_cursor_next_capture( continue; } - // Skip captures that precede the cursor's start byte. TSNode node = captures->contents[state->consumed_capture_count].node; - if (ts_node_end_byte(node) <= self->start_byte) { + + bool node_precedes_range = ( + ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point) + ); + bool node_follows_range = ( + ts_node_start_byte(node) >= self->end_byte || + point_gte(ts_node_start_point(node), self->end_point) + ); + bool node_outside_of_range = node_precedes_range || node_follows_range; + + // Skip captures that are outside of the cursor's range. + if (node_outside_of_range) { state->consumed_capture_count++; continue; } @@ -4072,4 +4124,11 @@ bool ts_query_cursor_next_capture( } } +void ts_query_cursor_set_max_start_depth( + TSQueryCursor *self, + uint32_t max_start_depth +) { + self->max_start_depth = max_start_depth; +} + #undef LOG diff --git a/lib/src/stack.c b/lib/src/stack.c index 98e3a96..98d8c56 100644 --- a/lib/src/stack.c +++ b/lib/src/stack.c @@ -5,6 +5,7 @@ #include "./stack.h" #include "./length.h" #include +#include #include #define MAX_LINK_COUNT 8 @@ -12,9 +13,9 @@ #define MAX_ITERATOR_COUNT 64 #if defined _WIN32 && !defined __GNUC__ -#define inline __forceinline +#define forceinline __forceinline #else -#define inline static inline __attribute__((always_inline)) +#define forceinline static inline __attribute__((always_inline)) #endif typedef struct StackNode StackNode; @@ -120,6 +121,20 @@ static void stack_node_release( } } +/// Get the number of nodes in the subtree, for the purpose of measuring +/// how much progress has been made by a given version of the stack. +static uint32_t stack__subtree_node_count(Subtree subtree) { + uint32_t count = ts_subtree_visible_descendant_count(subtree); + if (ts_subtree_visible(subtree)) count++; + + // Count intermediate error nodes even though they are not visible, + // because a stack version's node count is used to check whether it + // has made any progress since the last time it encountered an error. + if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) count++; + + return count; +} + static StackNode *stack_node_new( StackNode *previous_node, Subtree subtree, @@ -152,7 +167,7 @@ static StackNode *stack_node_new( if (subtree.ptr) { node->error_cost += ts_subtree_error_cost(subtree); node->position = length_add(node->position, ts_subtree_total_size(subtree)); - node->node_count += ts_subtree_node_count(subtree); + node->node_count += stack__subtree_node_count(subtree); node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree); } } else { @@ -213,7 +228,8 @@ static void stack_node_add_link( // If the previous nodes are mergeable, merge them recursively. if ( existing_link->node->state == link.node->state && - existing_link->node->position.bytes == link.node->position.bytes + existing_link->node->position.bytes == link.node->position.bytes && + existing_link->node->error_cost == link.node->error_cost ) { for (int j = 0; j < link.node->link_count; j++) { stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); @@ -239,7 +255,7 @@ static void stack_node_add_link( if (link.subtree.ptr) { ts_subtree_retain(link.subtree); - node_count += ts_subtree_node_count(link.subtree); + node_count += stack__subtree_node_count(link.subtree); dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); } @@ -305,7 +321,7 @@ static void ts_stack__add_slice( array_push(&self->slices, slice); } -inline StackSliceArray stack__iter( +static StackSliceArray stack__iter( Stack *self, StackVersion version, StackCallback callback, @@ -316,7 +332,7 @@ inline StackSliceArray stack__iter( array_clear(&self->iterators); StackHead *head = array_get(&self->heads, version); - StackIterator iterator = { + StackIterator new_iterator = { .node = head->node, .subtrees = array_new(), .subtree_count = 0, @@ -326,10 +342,10 @@ inline StackSliceArray stack__iter( bool include_subtrees = false; if (goal_subtree_count >= 0) { include_subtrees = true; - array_reserve(&iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); + array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); } - array_push(&self->iterators, iterator); + array_push(&self->iterators, new_iterator); while (self->iterators.size > 0) { for (uint32_t i = 0, size = self->iterators.size; i < size; i++) { @@ -495,7 +511,7 @@ void ts_stack_push( head->node = new_node; } -inline StackAction pop_count_callback(void *payload, const StackIterator *iterator) { +forceinline StackAction pop_count_callback(void *payload, const StackIterator *iterator) { unsigned *goal_subtree_count = payload; if (iterator->subtree_count == *goal_subtree_count) { return StackActionPop | StackActionStop; @@ -505,10 +521,10 @@ inline StackAction pop_count_callback(void *payload, const StackIterator *iterat } StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) { - return stack__iter(self, version, pop_count_callback, &count, count); + return stack__iter(self, version, pop_count_callback, &count, (int)count); } -inline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) { +forceinline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) { (void)payload; if (iterator->subtree_count >= 1) { if (iterator->is_pending) { @@ -530,7 +546,7 @@ StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) { return pop; } -inline StackAction pop_error_callback(void *payload, const StackIterator *iterator) { +forceinline StackAction pop_error_callback(void *payload, const StackIterator *iterator) { if (iterator->subtrees.size > 0) { bool *found_error = payload; if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) { @@ -561,7 +577,7 @@ SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) { return (SubtreeArray) {.size = 0}; } -inline StackAction pop_all_callback(void *payload, const StackIterator *iterator) { +forceinline StackAction pop_all_callback(void *payload, const StackIterator *iterator) { (void)payload; return iterator->node->link_count == 0 ? StackActionPop : StackActionNone; } @@ -575,7 +591,7 @@ typedef struct { unsigned max_depth; } SummarizeStackSession; -inline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { +forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { SummarizeStackSession *session = payload; TSStateId state = iterator->node->state; unsigned depth = iterator->subtree_count; @@ -852,7 +868,7 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) fprintf(f, "\""); fprintf( f, - "labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"", + "labeltooltip=\"error_cost: %u\ndynamic_precedence: %" PRId32 "\"", ts_subtree_error_cost(link.subtree), ts_subtree_dynamic_precedence(link.subtree) ); @@ -880,4 +896,4 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) return true; } -#undef inline +#undef forceinline diff --git a/lib/src/subtree.c b/lib/src/subtree.c index f8f8295..4524e18 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -1,10 +1,11 @@ #include #include -#include +#include #include #include #include #include "./alloc.h" +#include "./array.h" #include "./atomic.h" #include "./subtree.h" #include "./length.h" @@ -56,10 +57,10 @@ const char *ts_external_scanner_state_data(const ExternalScannerState *self) { } } -bool ts_external_scanner_state_eq(const ExternalScannerState *a, const char *buffer, unsigned length) { +bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length) { return - a->length == length && - memcmp(ts_external_scanner_state_data(a), buffer, length) == 0; + self->length == length && + memcmp(ts_external_scanner_state_data(self), buffer, length) == 0; } // SubtreeArray @@ -348,7 +349,7 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1]; long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2); if (repeat_delta > 0) { - unsigned n = repeat_delta; + unsigned n = (unsigned)repeat_delta; for (unsigned i = n / 2; i > 0; i /= 2) { ts_subtree__compress(tree, i, language, &pool->tree_stack); n -= i; @@ -376,7 +377,7 @@ void ts_subtree_summarize_children( self.ptr->visible_child_count = 0; self.ptr->error_cost = 0; self.ptr->repeat_depth = 0; - self.ptr->node_count = 1; + self.ptr->visible_descendant_count = 0; self.ptr->has_external_tokens = false; self.ptr->depends_on_column = false; self.ptr->has_external_scanner_state_change = false; @@ -435,14 +436,16 @@ void ts_subtree_summarize_children( } self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child); - self.ptr->node_count += ts_subtree_node_count(child); + self.ptr->visible_descendant_count += ts_subtree_visible_descendant_count(child); if (alias_sequence && alias_sequence[structural_index] != 0 && !ts_subtree_extra(child)) { + self.ptr->visible_descendant_count++; self.ptr->visible_child_count++; if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) { self.ptr->named_child_count++; } } else if (ts_subtree_visible(child)) { + self.ptr->visible_descendant_count++; self.ptr->visible_child_count++; if (ts_subtree_named(child)) self.ptr->named_child_count++; } else if (grandchild_count > 0) { @@ -513,7 +516,7 @@ MutableSubtree ts_subtree_new_node( size_t new_byte_size = ts_subtree_alloc_size(children->size); if (children->capacity * sizeof(Subtree) < new_byte_size) { children->contents = ts_realloc(children->contents, new_byte_size); - children->capacity = new_byte_size / sizeof(Subtree); + children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree)); } SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size]; @@ -529,7 +532,7 @@ MutableSubtree ts_subtree_new_node( .fragile_right = fragile, .is_keyword = false, {{ - .node_count = 0, + .visible_descendant_count = 0, .production_id = production_id, .first_leaf = {.symbol = 0, .parse_state = 0}, }} @@ -616,20 +619,32 @@ void ts_subtree_release(SubtreePool *pool, Subtree self) { } } -int ts_subtree_compare(Subtree left, Subtree right) { - if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) return -1; - if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) return 1; - if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1; - if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1; - for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) { - Subtree left_child = ts_subtree_children(left)[i]; - Subtree right_child = ts_subtree_children(right)[i]; - switch (ts_subtree_compare(left_child, right_child)) { - case -1: return -1; - case 1: return 1; - default: break; +int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool) { + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left)); + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right)); + + while (pool->tree_stack.size > 0) { + right = ts_subtree_from_mut(array_pop(&pool->tree_stack)); + left = ts_subtree_from_mut(array_pop(&pool->tree_stack)); + + int result = 0; + if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) result = -1; + else if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) result = 1; + else if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) result = -1; + else if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) result = 1; + if (result != 0) { + array_clear(&pool->tree_stack); + return result; + } + + for (uint32_t i = ts_subtree_child_count(left); i > 0; i--) { + Subtree left_child = ts_subtree_children(left)[i - 1]; + Subtree right_child = ts_subtree_children(right)[i - 1]; + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(left_child)); + array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(right_child)); } } + return 0; } @@ -641,24 +656,24 @@ static inline void ts_subtree_set_has_changes(MutableSubtree *self) { } } -Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool) { +Subtree ts_subtree_edit(Subtree self, const TSInputEdit *input_edit, SubtreePool *pool) { typedef struct { Subtree *tree; Edit edit; - } StackEntry; + } EditEntry; - Array(StackEntry) stack = array_new(); - array_push(&stack, ((StackEntry) { + Array(EditEntry) stack = array_new(); + array_push(&stack, ((EditEntry) { .tree = &self, .edit = (Edit) { - .start = {edit->start_byte, edit->start_point}, - .old_end = {edit->old_end_byte, edit->old_end_point}, - .new_end = {edit->new_end_byte, edit->new_end_point}, + .start = {input_edit->start_byte, input_edit->start_point}, + .old_end = {input_edit->old_end_byte, input_edit->old_end_point}, + .new_end = {input_edit->new_end_byte, input_edit->new_end_point}, }, })); while (stack.size) { - StackEntry entry = array_pop(&stack); + EditEntry entry = array_pop(&stack); Edit edit = entry.edit; bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes; bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; @@ -786,7 +801,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool } // Queue processing of this child's subtree. - array_push(&stack, ((StackEntry) { + array_push(&stack, ((EditEntry) { .tree = child, .edit = child_edit, })); @@ -811,24 +826,24 @@ Subtree ts_subtree_last_external_token(Subtree tree) { return tree; } -static size_t ts_subtree__write_char_to_string(char *s, size_t n, int32_t c) { - if (c == -1) - return snprintf(s, n, "INVALID"); - else if (c == '\0') - return snprintf(s, n, "'\\0'"); - else if (c == '\n') - return snprintf(s, n, "'\\n'"); - else if (c == '\t') - return snprintf(s, n, "'\\t'"); - else if (c == '\r') - return snprintf(s, n, "'\\r'"); - else if (0 < c && c < 128 && isprint(c)) - return snprintf(s, n, "'%c'", c); +static size_t ts_subtree__write_char_to_string(char *str, size_t n, int32_t chr) { + if (chr == -1) + return snprintf(str, n, "INVALID"); + else if (chr == '\0') + return snprintf(str, n, "'\\0'"); + else if (chr == '\n') + return snprintf(str, n, "'\\n'"); + else if (chr == '\t') + return snprintf(str, n, "'\\t'"); + else if (chr == '\r') + return snprintf(str, n, "'\\r'"); + else if (0 < chr && chr < 128 && isprint(chr)) + return snprintf(str, n, "'%c'", chr); else - return snprintf(s, n, "%d", c); + return snprintf(str, n, "%d", chr); } -static const char *ROOT_FIELD = "__ROOT__"; +static const char *const ROOT_FIELD = "__ROOT__"; static size_t ts_subtree__write_to_string( Subtree self, char *string, size_t limit, @@ -875,9 +890,15 @@ static size_t ts_subtree__write_to_string( } } } else if (is_root) { - TSSymbol symbol = ts_subtree_symbol(self); + TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); const char *symbol_name = ts_language_symbol_name(language, symbol); - cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name); + if (ts_subtree_child_count(self) > 0) { + cursor += snprintf(*writer, limit, "(%s", symbol_name); + } else if (ts_subtree_named(self)) { + cursor += snprintf(*writer, limit, "(%s)", symbol_name); + } else { + cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name); + } } if (ts_subtree_child_count(self)) { @@ -900,17 +921,17 @@ static size_t ts_subtree__write_to_string( 0, false, NULL ); } else { - TSSymbol alias_symbol = alias_sequence + TSSymbol subtree_alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0; - bool alias_is_named = alias_symbol - ? ts_language_symbol_metadata(language, alias_symbol).named + bool subtree_alias_is_named = subtree_alias_symbol + ? ts_language_symbol_metadata(language, subtree_alias_symbol).named : false; const char *child_field_name = is_visible ? NULL : field_name; - for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { - if (!i->inherited && i->child_index == structural_child_index) { - child_field_name = language->field_names[i->field_id]; + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { + if (!map->inherited && map->child_index == structural_child_index) { + child_field_name = language->field_names[map->field_id]; break; } } @@ -918,7 +939,7 @@ static size_t ts_subtree__write_to_string( cursor += ts_subtree__write_to_string( child, *writer, limit, language, include_all, - alias_symbol, alias_is_named, child_field_name + subtree_alias_symbol, subtree_alias_is_named, child_field_name ); structural_child_index++; } @@ -932,6 +953,8 @@ static size_t ts_subtree__write_to_string( char *ts_subtree_string( Subtree self, + TSSymbol alias_symbol, + bool alias_is_named, const TSLanguage *language, bool include_all ) { @@ -939,13 +962,13 @@ char *ts_subtree_string( size_t size = ts_subtree__write_to_string( self, scratch_string, 1, language, include_all, - 0, false, ROOT_FIELD + alias_symbol, alias_is_named, ROOT_FIELD ) + 1; char *result = ts_malloc(size * sizeof(char)); ts_subtree__write_to_string( self, result, size, language, include_all, - 0, false, ROOT_FIELD + alias_symbol, alias_is_named, ROOT_FIELD ); return result; } @@ -969,6 +992,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, "error-cost: %u\n" "has-changes: %u\n" "depends-on-column: %u\n" + "descendant-count: %u\n" "repeat-depth: %u\n" "lookahead-bytes: %u", start_offset, end_offset, @@ -976,11 +1000,12 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, ts_subtree_error_cost(*self), ts_subtree_has_changes(*self), ts_subtree_depends_on_column(*self), + ts_subtree_visible_descendant_count(*self), ts_subtree_repeat_depth(*self), ts_subtree_lookahead_bytes(*self) ); - if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0) { + if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) { fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char); } @@ -992,12 +1017,12 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, ts_subtree_production_id(*self); for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { const Subtree *child = &ts_subtree_children(*self)[i]; - TSSymbol alias_symbol = 0; + TSSymbol subtree_alias_symbol = 0; if (!ts_subtree_extra(*child) && child_info_offset) { - alias_symbol = language->alias_sequences[child_info_offset]; + subtree_alias_symbol = language->alias_sequences[child_info_offset]; child_info_offset++; } - ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f); + ts_subtree__print_dot_graph(child, child_start_offset, language, subtree_alias_symbol, f); fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i); child_start_offset += ts_subtree_total_bytes(*child); } @@ -1024,12 +1049,12 @@ const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) { } } -bool ts_subtree_external_scanner_state_eq(Subtree a, Subtree b) { - const ExternalScannerState *state_a = ts_subtree_external_scanner_state(a); - const ExternalScannerState *state_b = ts_subtree_external_scanner_state(b); +bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) { + const ExternalScannerState *state_self = ts_subtree_external_scanner_state(self); + const ExternalScannerState *state_other = ts_subtree_external_scanner_state(other); return ts_external_scanner_state_eq( - state_a, - ts_external_scanner_state_data(state_b), - state_b->length + state_self, + ts_external_scanner_state_data(state_other), + state_other->length ); } diff --git a/lib/src/subtree.h b/lib/src/subtree.h index a0e838e..f140ecd 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -13,7 +13,7 @@ extern "C" { #include "./error_costs.h" #include "./host.h" #include "tree_sitter/api.h" -#include "tree_sitter/parser.h" +#include "./parser.h" #define TS_TREE_STATE_NONE USHRT_MAX #define NULL_SUBTREE ((Subtree) {.ptr = NULL}) @@ -135,7 +135,7 @@ typedef struct { struct { uint32_t visible_child_count; uint32_t named_child_count; - uint32_t node_count; + uint32_t visible_descendant_count; int32_t dynamic_precedence; uint16_t repeat_depth; uint16_t production_id; @@ -175,7 +175,7 @@ typedef struct { void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned); const char *ts_external_scanner_state_data(const ExternalScannerState *); -bool ts_external_scanner_state_eq(const ExternalScannerState *a, const char *, unsigned); +bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *, unsigned); void ts_external_scanner_state_delete(ExternalScannerState *self); void ts_subtree_array_copy(SubtreeArray, SubtreeArray *); @@ -200,19 +200,19 @@ Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, c MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); void ts_subtree_retain(Subtree); void ts_subtree_release(SubtreePool *, Subtree); -int ts_subtree_compare(Subtree, Subtree); +int ts_subtree_compare(Subtree, Subtree, SubtreePool *); void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *); void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *); void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *); void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *); Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *); -char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all); +char *ts_subtree_string(Subtree, TSSymbol, bool, const TSLanguage *, bool include_all); void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *); Subtree ts_subtree_last_external_token(Subtree); const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self); bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); -#define SUBTREE_GET(self, name) (self.data.is_inline ? self.data.name : self.ptr->name) +#define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name) static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); } static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); } @@ -297,8 +297,10 @@ static inline uint32_t ts_subtree_is_repetition(Subtree self) { : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0; } -static inline uint32_t ts_subtree_node_count(Subtree self) { - return (self.data.is_inline || self.ptr->child_count == 0) ? 1 : self.ptr->node_count; +static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) { + return (self.data.is_inline || self.ptr->child_count == 0) + ? 0 + : self.ptr->visible_descendant_count; } static inline uint32_t ts_subtree_visible_child_count(Subtree self) { diff --git a/lib/src/tree.c b/lib/src/tree.c index 79e1d1a..1493673 100644 --- a/lib/src/tree.c +++ b/lib/src/tree.c @@ -1,3 +1,5 @@ +#define _POSIX_C_SOURCE 200112L + #include "tree_sitter/api.h" #include "./array.h" #include "./get_changed_ranges.h" @@ -12,7 +14,7 @@ TSTree *ts_tree_new( ) { TSTree *result = ts_malloc(sizeof(TSTree)); result->root = root; - result->language = language; + result->language = ts_language_copy(language); result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange)); memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange)); result->included_range_count = included_range_count; @@ -30,6 +32,7 @@ void ts_tree_delete(TSTree *self) { SubtreePool pool = ts_subtree_pool_new(0); ts_subtree_release(&pool, self->root); ts_subtree_pool_delete(&pool); + ts_language_delete(self->language); ts_free(self->included_ranges); ts_free(self); } @@ -98,23 +101,23 @@ TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) { return ranges; } -TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uint32_t *count) { - TreeCursor cursor1 = {NULL, array_new()}; - TreeCursor cursor2 = {NULL, array_new()}; - ts_tree_cursor_init(&cursor1, ts_tree_root_node(self)); - ts_tree_cursor_init(&cursor2, ts_tree_root_node(other)); +TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) { + TreeCursor cursor1 = {NULL, array_new(), 0}; + TreeCursor cursor2 = {NULL, array_new(), 0}; + ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); + ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree)); TSRangeArray included_range_differences = array_new(); ts_range_array_get_changed_ranges( - self->included_ranges, self->included_range_count, - other->included_ranges, other->included_range_count, + old_tree->included_ranges, old_tree->included_range_count, + new_tree->included_ranges, new_tree->included_range_count, &included_range_differences ); TSRange *result; - *count = ts_subtree_get_changed_ranges( - &self->root, &other->root, &cursor1, &cursor2, - self->language, &included_range_differences, &result + *length = ts_subtree_get_changed_ranges( + &old_tree->root, &new_tree->root, &cursor1, &cursor2, + old_tree->language, &included_range_differences, &result ); array_delete(&included_range_differences); @@ -125,17 +128,36 @@ TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uin #ifdef _WIN32 +#include +#include + +int _ts_dup(HANDLE handle) { + HANDLE dup_handle; + if (!DuplicateHandle( + GetCurrentProcess(), handle, + GetCurrentProcess(), &dup_handle, + 0, FALSE, DUPLICATE_SAME_ACCESS + )) return -1; + + return _open_osfhandle((intptr_t)dup_handle, 0); +} + void ts_tree_print_dot_graph(const TSTree *self, int fd) { - (void)self; - (void)fd; + FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a"); + ts_subtree_print_dot_graph(self->root, self->language, file); + fclose(file); } #else #include -void ts_tree_print_dot_graph(const TSTree *self, int fd) { - FILE *file = fdopen(dup(fd), "a"); +int _ts_dup(int file_descriptor) { + return dup(file_descriptor); +} + +void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) { + FILE *file = fdopen(_ts_dup(file_descriptor), "a"); ts_subtree_print_dot_graph(self->root, self->language, file); fclose(file); } diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 9893025..ddd7d66 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -10,26 +10,50 @@ typedef struct { Length position; uint32_t child_index; uint32_t structural_child_index; + uint32_t descendant_index; const TSSymbol *alias_sequence; } CursorChildIterator; // CursorChildIterator +static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) { + TreeCursorEntry *entry = &self->stack.contents[index]; + if (index == 0 || ts_subtree_visible(*entry->subtree)) { + return true; + } else if (!ts_subtree_extra(*entry->subtree)) { + TreeCursorEntry *parent_entry = &self->stack.contents[index - 1]; + return ts_language_alias_at( + self->tree->language, + parent_entry->subtree->ptr->production_id, + entry->structural_child_index + ); + } else { + return false; + } +} + static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { TreeCursorEntry *last_entry = array_back(&self->stack); if (ts_subtree_child_count(*last_entry->subtree) == 0) { - return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, NULL}; + return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL}; } const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, last_entry->subtree->ptr->production_id ); + + uint32_t descendant_index = last_entry->descendant_index; + if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) { + descendant_index += 1; + } + return (CursorChildIterator) { .tree = self->tree, .parent = *last_entry->subtree, .position = last_entry->position, .child_index = 0, .structural_child_index = 0, + .descendant_index = descendant_index, .alias_sequence = alias_sequence, }; } @@ -46,14 +70,22 @@ static inline bool ts_tree_cursor_child_iterator_next( .position = self->position, .child_index = self->child_index, .structural_child_index = self->structural_child_index, + .descendant_index = self->descendant_index, }; *visible = ts_subtree_visible(*child); bool extra = ts_subtree_extra(*child); - if (!extra && self->alias_sequence) { - *visible |= self->alias_sequence[self->structural_child_index]; + if (!extra) { + if (self->alias_sequence) { + *visible |= self->alias_sequence[self->structural_child_index]; + } self->structural_child_index++; } + self->descendant_index += ts_subtree_visible_descendant_count(*child); + if (*visible) { + self->descendant_index += 1; + } + self->position = length_add(self->position, ts_subtree_size(*child)); self->child_index++; @@ -65,10 +97,61 @@ static inline bool ts_tree_cursor_child_iterator_next( return true; } +// Return a position that, when `b` is added to it, yields `a`. This +// can only be computed if `b` has zero rows. Otherwise, this function +// returns `LENGTH_UNDEFINED`, and the caller needs to recompute +// the position some other way. +static inline Length length_backtrack(Length a, Length b) { + if (length_is_undefined(a) || b.extent.row != 0) { + return LENGTH_UNDEFINED; + } + + Length result; + result.bytes = a.bytes - b.bytes; + result.extent.row = a.extent.row; + result.extent.column = a.extent.column - b.extent.column; + return result; +} + +static inline bool ts_tree_cursor_child_iterator_previous( + CursorChildIterator *self, + TreeCursorEntry *result, + bool *visible +) { + // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into + // account unsigned underflow + if (!self->parent.ptr || (int8_t)self->child_index == -1) return false; + const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; + *result = (TreeCursorEntry) { + .subtree = child, + .position = self->position, + .child_index = self->child_index, + .structural_child_index = self->structural_child_index, + }; + *visible = ts_subtree_visible(*child); + bool extra = ts_subtree_extra(*child); + if (!extra && self->alias_sequence) { + *visible |= self->alias_sequence[self->structural_child_index]; + self->structural_child_index--; + } + + self->position = length_backtrack(self->position, ts_subtree_padding(*child)); + self->child_index--; + + // unsigned can underflow so compare it to child_count + if (self->child_index < self->parent.ptr->child_count) { + Subtree previous_child = ts_subtree_children(self->parent)[self->child_index]; + Length size = ts_subtree_size(previous_child); + self->position = length_backtrack(self->position, size); + } + + return true; +} + // TSTreeCursor - lifecycle TSTreeCursor ts_tree_cursor_new(TSNode node) { - TSTreeCursor self = {NULL, NULL, {0, 0}}; + TSTreeCursor self = {NULL, NULL, {0, 0, 0}}; ts_tree_cursor_init((TreeCursor *)&self, node); return self; } @@ -79,6 +162,7 @@ void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) { void ts_tree_cursor_init(TreeCursor *self, TSNode node) { self->tree = node.tree; + self->root_alias_symbol = node.context[3]; array_clear(&self->stack); array_push(&self->stack, ((TreeCursorEntry) { .subtree = (const Subtree *)node.id, @@ -88,6 +172,7 @@ void ts_tree_cursor_init(TreeCursor *self, TSNode node) { }, .child_index = 0, .structural_child_index = 0, + .descendant_index = 0, })); } @@ -130,6 +215,47 @@ bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) { return false; } +TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) { + TreeCursor *self = (TreeCursor *)_self; + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone; + + TreeCursorEntry last_entry = {0}; + TreeCursorStep last_step = TreeCursorStepNone; + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { + if (visible) { + last_entry = entry; + last_step = TreeCursorStepVisible; + } + else if (ts_subtree_visible_child_count(*entry.subtree) > 0) { + last_entry = entry; + last_step = TreeCursorStepHidden; + } + } + if (last_entry.subtree) { + array_push(&self->stack, last_entry); + return last_step; + } + + return TreeCursorStepNone; +} + +bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) { + for (;;) { + switch (ts_tree_cursor_goto_last_child_internal(self)) { + case TreeCursorStepHidden: + continue; + case TreeCursorStepVisible: + return true; + default: + return false; + } + } + return false; +} + static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( TSTreeCursor *_self, uint32_t goal_byte, @@ -180,7 +306,9 @@ int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint go return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point); } -TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { +TreeCursorStep ts_tree_cursor_goto_sibling_internal( + TSTreeCursor *_self, + bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) { TreeCursor *self = (TreeCursor *)_self; uint32_t initial_size = self->stack.size; @@ -190,12 +318,13 @@ TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { iterator.child_index = entry.child_index; iterator.structural_child_index = entry.structural_child_index; iterator.position = entry.position; + iterator.descendant_index = entry.descendant_index; bool visible = false; - ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible); + advance(&iterator, &entry, &visible); if (visible && self->stack.size + 1 < initial_size) break; - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { + while (advance(&iterator, &entry, &visible)) { if (visible) { array_push(&self->stack, entry); return TreeCursorStepVisible; @@ -212,6 +341,10 @@ TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { return TreeCursorStepNone; } +TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { + return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next); +} + bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { switch (ts_tree_cursor_goto_next_sibling_internal(self)) { case TreeCursorStepHidden: @@ -224,33 +357,125 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { } } +TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) { + // since subtracting across row loses column information, we may have to + // restore it + TreeCursor *self = (TreeCursor *)_self; + + // for that, save current position before traversing + TreeCursorStep step = ts_tree_cursor_goto_sibling_internal( + _self, ts_tree_cursor_child_iterator_previous); + if (step == TreeCursorStepNone) + return step; + + // if length is already valid, there's no need to recompute it + if (!length_is_undefined(array_back(&self->stack)->position)) + return step; + + // restore position from the parent node + const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2]; + Length position = parent->position; + uint32_t child_index = array_back(&self->stack)->child_index; + const Subtree *children = ts_subtree_children((*(parent->subtree))); + + if (child_index > 0) { + // skip first child padding since its position should match the position of the parent + position = length_add(position, ts_subtree_size(children[0])); + for (uint32_t i = 1; i < child_index; ++i) { + position = length_add(position, ts_subtree_total_size(children[i])); + } + position = length_add(position, ts_subtree_padding(children[child_index])); + } + + array_back(&self->stack)->position = position; + + return step; +} + +bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) { + switch (ts_tree_cursor_goto_previous_sibling_internal(self)) { + case TreeCursorStepHidden: + ts_tree_cursor_goto_last_child(self); + return true; + case TreeCursorStepVisible: + return true; + default: + return false; + } +} + bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { TreeCursor *self = (TreeCursor *)_self; for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { - TreeCursorEntry *entry = &self->stack.contents[i]; - if (ts_subtree_visible(*entry->subtree)) { + if (ts_tree_cursor_is_entry_visible(self, i)) { self->stack.size = i + 1; return true; } - if (i > 0 && !ts_subtree_extra(*entry->subtree)) { - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - if (ts_language_alias_at( - self->tree->language, - parent_entry->subtree->ptr->production_id, - entry->structural_child_index - )) { - self->stack.size = i + 1; - return true; - } - } } return false; } +void ts_tree_cursor_goto_descendant( + TSTreeCursor *_self, + uint32_t goal_descendant_index +) { + TreeCursor *self = (TreeCursor *)_self; + + // Ascend to the lowest ancestor that contains the goal node. + for (;;) { + uint32_t i = self->stack.size - 1; + TreeCursorEntry *entry = &self->stack.contents[i]; + uint32_t next_descendant_index = + entry->descendant_index + + (ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) + + ts_subtree_visible_descendant_count(*entry->subtree); + if ( + (entry->descendant_index <= goal_descendant_index) && + (next_descendant_index > goal_descendant_index) + ) { + break; + } else if (self->stack.size <= 1) { + return; + } else { + self->stack.size--; + } + } + + // Descend to the goal node. + bool did_descend = true; + do { + did_descend = false; + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + if (iterator.descendant_index > goal_descendant_index) { + return; + } + + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { + if (iterator.descendant_index > goal_descendant_index) { + array_push(&self->stack, entry); + if (visible && entry.descendant_index == goal_descendant_index) { + return; + } else { + did_descend = true; + break; + } + } + } + } while (did_descend); +} + +uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) { + const TreeCursor *self = (const TreeCursor *)_self; + TreeCursorEntry *last_entry = array_back(&self->stack); + return last_entry->descendant_index; +} + TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); - TSSymbol alias_symbol = 0; + TSSymbol alias_symbol = self->root_alias_symbol; if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) { TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; alias_symbol = ts_language_alias_at( @@ -365,9 +590,9 @@ void ts_tree_cursor_current_status( // Look for a field name associated with the current node. if (!*field_id) { - for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { - if (!i->inherited && i->child_index == entry->structural_child_index) { - *field_id = i->field_id; + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { + if (!map->inherited && map->child_index == entry->structural_child_index) { + *field_id = map->field_id; break; } } @@ -375,10 +600,10 @@ void ts_tree_cursor_current_status( // Determine if the current node can have later siblings with the same field name. if (*field_id) { - for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { if ( - i->field_id == *field_id && - i->child_index > entry->structural_child_index + map->field_id == *field_id && + map->child_index > entry->structural_child_index ) { *can_have_later_siblings_with_this_field = true; break; @@ -389,6 +614,17 @@ void ts_tree_cursor_current_status( } } +uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) { + const TreeCursor *self = (const TreeCursor *)_self; + uint32_t depth = 0; + for (unsigned i = 1; i < self->stack.size; i++) { + if (ts_tree_cursor_is_entry_visible(self, i)) { + depth++; + } + } + return depth; +} + TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; for (int i = (int)self->stack.size - 2; i >= 0; i--) { @@ -425,17 +661,10 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; // Stop walking up when another visible node is found. - if (i != self->stack.size - 1) { - if (ts_subtree_visible(*entry->subtree)) break; - if ( - !ts_subtree_extra(*entry->subtree) && - ts_language_alias_at( - self->tree->language, - parent_entry->subtree->ptr->production_id, - entry->structural_child_index - ) - ) break; - } + if ( + i != self->stack.size - 1 && + ts_tree_cursor_is_entry_visible(self, i) + ) break; if (ts_subtree_extra(*entry->subtree)) break; @@ -445,9 +674,9 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { parent_entry->subtree->ptr->production_id, &field_map, &field_map_end ); - for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { - if (!i->inherited && i->child_index == entry->structural_child_index) { - return i->field_id; + for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) { + if (!map->inherited && map->child_index == entry->structural_child_index) { + return map->field_id; } } } @@ -469,7 +698,17 @@ TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) { TSTreeCursor res = {NULL, NULL, {0, 0}}; TreeCursor *copy = (TreeCursor *)&res; copy->tree = cursor->tree; + copy->root_alias_symbol = cursor->root_alias_symbol; array_init(©->stack); array_push_all(©->stack, &cursor->stack); return res; } + +void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) { + const TreeCursor *cursor = (const TreeCursor *)_src; + TreeCursor *copy = (TreeCursor *)_dst; + copy->tree = cursor->tree; + copy->root_alias_symbol = cursor->root_alias_symbol; + array_clear(©->stack); + array_push_all(©->stack, &cursor->stack); +} diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h index 7b94db6..96a386d 100644 --- a/lib/src/tree_cursor.h +++ b/lib/src/tree_cursor.h @@ -8,11 +8,13 @@ typedef struct { Length position; uint32_t child_index; uint32_t structural_child_index; + uint32_t descendant_index; } TreeCursorEntry; typedef struct { const TSTree *tree; Array(TreeCursorEntry) stack; + TSSymbol root_alias_symbol; } TreeCursor; typedef enum { diff --git a/lib/src/wasm/stdlib-symbols.txt b/lib/src/wasm/stdlib-symbols.txt new file mode 100644 index 0000000..1b6d789 --- /dev/null +++ b/lib/src/wasm/stdlib-symbols.txt @@ -0,0 +1,24 @@ +"calloc", +"free", +"iswalnum", +"iswalpha", +"iswblank", +"iswdigit", +"iswlower", +"iswspace", +"iswupper", +"iswxdigit", +"malloc", +"memchr", +"memcmp", +"memcpy", +"memmove", +"memset", +"realloc", +"strcmp", +"strlen", +"strncat", +"strncmp", +"strncpy", +"towlower", +"towupper", diff --git a/lib/src/wasm/stdlib.c b/lib/src/wasm/stdlib.c new file mode 100644 index 0000000..cfe2e4b --- /dev/null +++ b/lib/src/wasm/stdlib.c @@ -0,0 +1,109 @@ +// This file implements a very simple allocator for external scanners running +// in WASM. Allocation is just bumping a static pointer and growing the heap +// as needed, and freeing is mostly a noop. But in the special case of freeing +// the last-allocated pointer, we'll reuse that pointer again. + +#include +#include +#include +#include + +extern void tree_sitter_debug_message(const char *, size_t); + +#define PAGESIZE 0x10000 +#define MAX_HEAP_SIZE (4 * 1024 * 1024) + +typedef struct { + size_t size; + char data[0]; +} Region; + +static Region *heap_end = NULL; +static Region *heap_start = NULL; +static Region *next = NULL; + +// Get the region metadata for the given heap pointer. +static inline Region *region_for_ptr(void *ptr) { + return ((Region *)ptr) - 1; +} + +// Get the location of the next region after the given region, +// if the given region had the given size. +static inline Region *region_after(Region *self, size_t len) { + char *address = self->data + len; + char *aligned = (char *)((uintptr_t)(address + 3) & ~0x3); + return (Region *)aligned; +} + +static void *get_heap_end() { + return (void *)(__builtin_wasm_memory_size(0) * PAGESIZE); +} + +static int grow_heap(size_t size) { + size_t new_page_count = ((size - 1) / PAGESIZE) + 1; + return __builtin_wasm_memory_grow(0, new_page_count) != SIZE_MAX; +} + +// Clear out the heap, and move it to the given address. +void reset_heap(void *new_heap_start) { + heap_start = new_heap_start; + next = new_heap_start; + heap_end = get_heap_end(); +} + +void *malloc(size_t size) { + Region *region_end = region_after(next, size); + + if (region_end > heap_end) { + if ((char *)region_end - (char *)heap_start > MAX_HEAP_SIZE) { + return NULL; + } + if (!grow_heap(size)) return NULL; + heap_end = get_heap_end(); + } + + void *result = &next->data; + next->size = size; + next = region_end; + + return result; +} + +void free(void *ptr) { + if (ptr == NULL) return; + + Region *region = region_for_ptr(ptr); + Region *region_end = region_after(region, region->size); + + // When freeing the last allocated pointer, re-use that + // pointer for the next allocation. + if (region_end == next) { + next = region; + } +} + +void *calloc(size_t count, size_t size) { + void *result = malloc(count * size); + memset(result, 0, count * size); + return result; +} + +void *realloc(void *ptr, size_t new_size) { + if (ptr == NULL) { + return malloc(new_size); + } + + Region *region = region_for_ptr(ptr); + Region *region_end = region_after(region, region->size); + + // When reallocating the last allocated region, return + // the same pointer, and skip copying the data. + if (region_end == next) { + next = region; + return malloc(new_size); + } + + void *result = malloc(new_size); + memcpy(result, ®ion->data, region->size); + return result; +} diff --git a/lib/src/wasm/wasm-stdlib.h b/lib/src/wasm/wasm-stdlib.h new file mode 100644 index 0000000..c1f3bc0 --- /dev/null +++ b/lib/src/wasm/wasm-stdlib.h @@ -0,0 +1,1302 @@ +unsigned char STDLIB_WASM[] = { + 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x1e, 0x06, 0x60, + 0x02, 0x7f, 0x7f, 0x01, 0x7f, 0x60, 0x01, 0x7f, 0x00, 0x60, 0x00, 0x00, + 0x60, 0x01, 0x7f, 0x01, 0x7f, 0x60, 0x00, 0x01, 0x7f, 0x60, 0x03, 0x7f, + 0x7f, 0x7f, 0x01, 0x7f, 0x02, 0x9e, 0x01, 0x05, 0x03, 0x65, 0x6e, 0x76, + 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x02, 0x00, 0x02, 0x03, 0x65, + 0x6e, 0x76, 0x19, 0x5f, 0x5f, 0x69, 0x6e, 0x64, 0x69, 0x72, 0x65, 0x63, + 0x74, 0x5f, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, + 0x61, 0x62, 0x6c, 0x65, 0x01, 0x70, 0x00, 0x01, 0x16, 0x77, 0x61, 0x73, + 0x69, 0x5f, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x5f, 0x70, + 0x72, 0x65, 0x76, 0x69, 0x65, 0x77, 0x31, 0x08, 0x61, 0x72, 0x67, 0x73, + 0x5f, 0x67, 0x65, 0x74, 0x00, 0x00, 0x16, 0x77, 0x61, 0x73, 0x69, 0x5f, + 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x5f, 0x70, 0x72, 0x65, + 0x76, 0x69, 0x65, 0x77, 0x31, 0x0e, 0x61, 0x72, 0x67, 0x73, 0x5f, 0x73, + 0x69, 0x7a, 0x65, 0x73, 0x5f, 0x67, 0x65, 0x74, 0x00, 0x00, 0x16, 0x77, + 0x61, 0x73, 0x69, 0x5f, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, + 0x5f, 0x70, 0x72, 0x65, 0x76, 0x69, 0x65, 0x77, 0x31, 0x09, 0x70, 0x72, + 0x6f, 0x63, 0x5f, 0x65, 0x78, 0x69, 0x74, 0x00, 0x01, 0x03, 0x2a, 0x29, + 0x02, 0x00, 0x02, 0x02, 0x01, 0x03, 0x01, 0x00, 0x00, 0x01, 0x04, 0x00, + 0x00, 0x01, 0x02, 0x02, 0x05, 0x05, 0x03, 0x03, 0x05, 0x05, 0x00, 0x03, + 0x00, 0x03, 0x05, 0x03, 0x05, 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x05, + 0x03, 0x03, 0x00, 0x03, 0x03, 0x06, 0x0d, 0x02, 0x7f, 0x01, 0x41, 0x80, + 0x80, 0x04, 0x0b, 0x7f, 0x00, 0x41, 0x00, 0x0b, 0x07, 0xad, 0x02, 0x1c, + 0x11, 0x5f, 0x5f, 0x77, 0x61, 0x73, 0x6d, 0x5f, 0x63, 0x61, 0x6c, 0x6c, + 0x5f, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x00, 0x03, 0x0f, 0x5f, 0x5f, 0x73, + 0x74, 0x61, 0x63, 0x6b, 0x5f, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, + 0x03, 0x00, 0x06, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x00, 0x06, 0x0a, + 0x72, 0x65, 0x73, 0x65, 0x74, 0x5f, 0x68, 0x65, 0x61, 0x70, 0x00, 0x07, + 0x06, 0x6d, 0x61, 0x6c, 0x6c, 0x6f, 0x63, 0x00, 0x08, 0x04, 0x66, 0x72, + 0x65, 0x65, 0x00, 0x09, 0x06, 0x63, 0x61, 0x6c, 0x6c, 0x6f, 0x63, 0x00, + 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x73, 0x65, 0x74, 0x00, 0x14, 0x07, 0x72, + 0x65, 0x61, 0x6c, 0x6c, 0x6f, 0x63, 0x00, 0x0b, 0x06, 0x6d, 0x65, 0x6d, + 0x63, 0x70, 0x79, 0x00, 0x13, 0x06, 0x73, 0x74, 0x72, 0x6c, 0x65, 0x6e, + 0x00, 0x15, 0x08, 0x69, 0x73, 0x77, 0x61, 0x6c, 0x6e, 0x75, 0x6d, 0x00, + 0x2b, 0x08, 0x69, 0x73, 0x77, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x00, 0x16, + 0x08, 0x69, 0x73, 0x77, 0x62, 0x6c, 0x61, 0x6e, 0x6b, 0x00, 0x22, 0x08, + 0x69, 0x73, 0x77, 0x64, 0x69, 0x67, 0x69, 0x74, 0x00, 0x23, 0x08, 0x69, + 0x73, 0x77, 0x6c, 0x6f, 0x77, 0x65, 0x72, 0x00, 0x20, 0x08, 0x69, 0x73, + 0x77, 0x73, 0x70, 0x61, 0x63, 0x65, 0x00, 0x2a, 0x08, 0x69, 0x73, 0x77, + 0x75, 0x70, 0x70, 0x65, 0x72, 0x00, 0x1e, 0x09, 0x69, 0x73, 0x77, 0x78, + 0x64, 0x69, 0x67, 0x69, 0x74, 0x00, 0x27, 0x08, 0x74, 0x6f, 0x77, 0x6c, + 0x6f, 0x77, 0x65, 0x72, 0x00, 0x1a, 0x08, 0x74, 0x6f, 0x77, 0x75, 0x70, + 0x70, 0x65, 0x72, 0x00, 0x1c, 0x06, 0x6d, 0x65, 0x6d, 0x63, 0x68, 0x72, + 0x00, 0x18, 0x06, 0x6d, 0x65, 0x6d, 0x63, 0x6d, 0x70, 0x00, 0x17, 0x07, + 0x6d, 0x65, 0x6d, 0x6d, 0x6f, 0x76, 0x65, 0x00, 0x1f, 0x06, 0x73, 0x74, + 0x72, 0x63, 0x6d, 0x70, 0x00, 0x19, 0x07, 0x73, 0x74, 0x72, 0x6e, 0x63, + 0x61, 0x74, 0x00, 0x24, 0x07, 0x73, 0x74, 0x72, 0x6e, 0x63, 0x6d, 0x70, + 0x00, 0x1d, 0x07, 0x73, 0x74, 0x72, 0x6e, 0x63, 0x70, 0x79, 0x00, 0x26, + 0x08, 0x01, 0x05, 0x0a, 0xe8, 0x2b, 0x29, 0x02, 0x00, 0x0b, 0x03, 0x00, + 0x00, 0x0b, 0x0d, 0x00, 0x41, 0xe8, 0xc2, 0x04, 0x41, 0x00, 0x41, 0x10, + 0xfc, 0x0b, 0x00, 0x0b, 0x52, 0x01, 0x01, 0x7f, 0x02, 0x40, 0x02, 0x40, + 0x23, 0x81, 0x80, 0x80, 0x80, 0x00, 0x41, 0xe8, 0xc2, 0x84, 0x80, 0x00, + 0x6a, 0x28, 0x02, 0x00, 0x0d, 0x00, 0x23, 0x81, 0x80, 0x80, 0x80, 0x00, + 0x41, 0xe8, 0xc2, 0x84, 0x80, 0x00, 0x6a, 0x41, 0x01, 0x36, 0x02, 0x00, + 0x10, 0x83, 0x80, 0x80, 0x80, 0x00, 0x10, 0x8d, 0x80, 0x80, 0x80, 0x00, + 0x21, 0x00, 0x10, 0x92, 0x80, 0x80, 0x80, 0x00, 0x20, 0x00, 0x0d, 0x01, + 0x0f, 0x0b, 0x00, 0x00, 0x0b, 0x20, 0x00, 0x10, 0x90, 0x80, 0x80, 0x80, + 0x00, 0x00, 0x0b, 0x37, 0x01, 0x01, 0x7f, 0x23, 0x81, 0x80, 0x80, 0x80, + 0x00, 0x22, 0x01, 0x41, 0xf0, 0xc2, 0x84, 0x80, 0x00, 0x6a, 0x20, 0x00, + 0x36, 0x02, 0x00, 0x20, 0x01, 0x41, 0xec, 0xc2, 0x84, 0x80, 0x00, 0x6a, + 0x20, 0x00, 0x36, 0x02, 0x00, 0x20, 0x01, 0x41, 0xf4, 0xc2, 0x84, 0x80, + 0x00, 0x6a, 0x3f, 0x00, 0x41, 0x10, 0x74, 0x36, 0x02, 0x00, 0x0b, 0xb4, + 0x01, 0x01, 0x03, 0x7f, 0x02, 0x40, 0x02, 0x40, 0x23, 0x81, 0x80, 0x80, + 0x80, 0x00, 0x22, 0x01, 0x41, 0xf4, 0xc2, 0x84, 0x80, 0x00, 0x6a, 0x28, + 0x02, 0x00, 0x20, 0x01, 0x41, 0xf0, 0xc2, 0x84, 0x80, 0x00, 0x6a, 0x28, + 0x02, 0x00, 0x22, 0x01, 0x20, 0x00, 0x6a, 0x41, 0x07, 0x6a, 0x41, 0x7c, + 0x71, 0x22, 0x02, 0x4f, 0x0d, 0x00, 0x41, 0x00, 0x21, 0x01, 0x20, 0x02, + 0x23, 0x81, 0x80, 0x80, 0x80, 0x00, 0x41, 0xec, 0xc2, 0x84, 0x80, 0x00, + 0x6a, 0x28, 0x02, 0x00, 0x6b, 0x41, 0x80, 0x80, 0x80, 0x02, 0x4a, 0x0d, + 0x01, 0x20, 0x00, 0x41, 0x7f, 0x6a, 0x41, 0x10, 0x76, 0x41, 0x01, 0x6a, + 0x40, 0x00, 0x41, 0x7f, 0x46, 0x0d, 0x01, 0x3f, 0x00, 0x21, 0x01, 0x23, + 0x81, 0x80, 0x80, 0x80, 0x00, 0x22, 0x03, 0x41, 0xf4, 0xc2, 0x84, 0x80, + 0x00, 0x6a, 0x20, 0x01, 0x41, 0x10, 0x74, 0x36, 0x02, 0x00, 0x20, 0x03, + 0x41, 0xf0, 0xc2, 0x84, 0x80, 0x00, 0x6a, 0x28, 0x02, 0x00, 0x21, 0x01, + 0x0b, 0x20, 0x01, 0x20, 0x00, 0x36, 0x02, 0x00, 0x23, 0x81, 0x80, 0x80, + 0x80, 0x00, 0x41, 0xf0, 0xc2, 0x84, 0x80, 0x00, 0x6a, 0x20, 0x02, 0x36, + 0x02, 0x00, 0x20, 0x01, 0x41, 0x04, 0x6a, 0x21, 0x01, 0x0b, 0x20, 0x01, + 0x0b, 0x48, 0x01, 0x02, 0x7f, 0x02, 0x40, 0x20, 0x00, 0x45, 0x0d, 0x00, + 0x20, 0x00, 0x41, 0x7c, 0x6a, 0x22, 0x01, 0x28, 0x02, 0x00, 0x21, 0x02, + 0x23, 0x81, 0x80, 0x80, 0x80, 0x00, 0x41, 0xf0, 0xc2, 0x84, 0x80, 0x00, + 0x6a, 0x28, 0x02, 0x00, 0x20, 0x00, 0x20, 0x02, 0x6a, 0x41, 0x03, 0x6a, + 0x41, 0x7c, 0x71, 0x47, 0x0d, 0x00, 0x23, 0x81, 0x80, 0x80, 0x80, 0x00, + 0x41, 0xf0, 0xc2, 0x84, 0x80, 0x00, 0x6a, 0x20, 0x01, 0x36, 0x02, 0x00, + 0x0b, 0x0b, 0x19, 0x00, 0x20, 0x01, 0x20, 0x00, 0x6c, 0x22, 0x00, 0x10, + 0x88, 0x80, 0x80, 0x80, 0x00, 0x41, 0x00, 0x20, 0x00, 0x10, 0x94, 0x80, + 0x80, 0x80, 0x00, 0x0b, 0x6b, 0x01, 0x02, 0x7f, 0x02, 0x40, 0x20, 0x00, + 0x45, 0x0d, 0x00, 0x20, 0x00, 0x41, 0x7c, 0x6a, 0x22, 0x02, 0x28, 0x02, + 0x00, 0x21, 0x03, 0x02, 0x40, 0x23, 0x81, 0x80, 0x80, 0x80, 0x00, 0x41, + 0xf0, 0xc2, 0x84, 0x80, 0x00, 0x6a, 0x28, 0x02, 0x00, 0x20, 0x00, 0x20, + 0x03, 0x6a, 0x41, 0x03, 0x6a, 0x41, 0x7c, 0x71, 0x47, 0x0d, 0x00, 0x23, + 0x81, 0x80, 0x80, 0x80, 0x00, 0x41, 0xf0, 0xc2, 0x84, 0x80, 0x00, 0x6a, + 0x20, 0x02, 0x36, 0x02, 0x00, 0x0c, 0x01, 0x0b, 0x20, 0x01, 0x10, 0x88, + 0x80, 0x80, 0x80, 0x00, 0x20, 0x00, 0x20, 0x02, 0x28, 0x02, 0x00, 0x10, + 0x93, 0x80, 0x80, 0x80, 0x00, 0x0f, 0x0b, 0x20, 0x01, 0x10, 0x88, 0x80, + 0x80, 0x80, 0x00, 0x0b, 0x0b, 0x00, 0x20, 0x00, 0x10, 0x90, 0x80, 0x80, + 0x80, 0x00, 0x00, 0x0b, 0xd5, 0x01, 0x01, 0x03, 0x7f, 0x23, 0x80, 0x80, + 0x80, 0x80, 0x00, 0x41, 0x10, 0x6b, 0x22, 0x00, 0x24, 0x80, 0x80, 0x80, + 0x80, 0x00, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, + 0x20, 0x00, 0x41, 0x08, 0x6a, 0x20, 0x00, 0x41, 0x0c, 0x6a, 0x10, 0x8f, + 0x80, 0x80, 0x80, 0x00, 0x0d, 0x00, 0x20, 0x00, 0x28, 0x02, 0x08, 0x41, + 0x01, 0x6a, 0x22, 0x01, 0x45, 0x0d, 0x01, 0x20, 0x00, 0x28, 0x02, 0x0c, + 0x10, 0x88, 0x80, 0x80, 0x80, 0x00, 0x22, 0x02, 0x45, 0x0d, 0x02, 0x20, + 0x01, 0x41, 0x04, 0x10, 0x8a, 0x80, 0x80, 0x80, 0x00, 0x22, 0x01, 0x45, + 0x0d, 0x03, 0x20, 0x01, 0x20, 0x02, 0x10, 0x8e, 0x80, 0x80, 0x80, 0x00, + 0x0d, 0x04, 0x20, 0x00, 0x28, 0x02, 0x08, 0x20, 0x01, 0x10, 0x84, 0x80, + 0x80, 0x80, 0x00, 0x21, 0x01, 0x20, 0x00, 0x41, 0x10, 0x6a, 0x24, 0x80, + 0x80, 0x80, 0x80, 0x00, 0x20, 0x01, 0x0f, 0x0b, 0x41, 0xc7, 0x00, 0x10, + 0x8c, 0x80, 0x80, 0x80, 0x00, 0x00, 0x0b, 0x41, 0xc6, 0x00, 0x10, 0x8c, + 0x80, 0x80, 0x80, 0x00, 0x00, 0x0b, 0x41, 0xc6, 0x00, 0x10, 0x8c, 0x80, + 0x80, 0x80, 0x00, 0x00, 0x0b, 0x20, 0x02, 0x10, 0x89, 0x80, 0x80, 0x80, + 0x00, 0x41, 0xc6, 0x00, 0x10, 0x8c, 0x80, 0x80, 0x80, 0x00, 0x00, 0x0b, + 0x20, 0x02, 0x10, 0x89, 0x80, 0x80, 0x80, 0x00, 0x20, 0x01, 0x10, 0x89, + 0x80, 0x80, 0x80, 0x00, 0x41, 0xc7, 0x00, 0x10, 0x8c, 0x80, 0x80, 0x80, + 0x00, 0x00, 0x0b, 0x11, 0x00, 0x20, 0x00, 0x20, 0x01, 0x10, 0x80, 0x80, + 0x80, 0x80, 0x00, 0x41, 0xff, 0xff, 0x03, 0x71, 0x0b, 0x11, 0x00, 0x20, + 0x00, 0x20, 0x01, 0x10, 0x81, 0x80, 0x80, 0x80, 0x00, 0x41, 0xff, 0xff, + 0x03, 0x71, 0x0b, 0x0b, 0x00, 0x20, 0x00, 0x10, 0x82, 0x80, 0x80, 0x80, + 0x00, 0x00, 0x0b, 0x02, 0x00, 0x0b, 0x0e, 0x00, 0x10, 0x91, 0x80, 0x80, + 0x80, 0x00, 0x10, 0x91, 0x80, 0x80, 0x80, 0x00, 0x0b, 0xe6, 0x07, 0x01, + 0x04, 0x7f, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, 0x20, 0x02, 0x41, 0x20, + 0x4b, 0x0d, 0x00, 0x20, 0x01, 0x41, 0x03, 0x71, 0x45, 0x0d, 0x01, 0x20, + 0x02, 0x45, 0x0d, 0x01, 0x20, 0x00, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x3a, + 0x00, 0x00, 0x20, 0x02, 0x41, 0x7f, 0x6a, 0x21, 0x03, 0x20, 0x00, 0x41, + 0x01, 0x6a, 0x21, 0x04, 0x20, 0x01, 0x41, 0x01, 0x6a, 0x22, 0x05, 0x41, + 0x03, 0x71, 0x45, 0x0d, 0x02, 0x20, 0x03, 0x45, 0x0d, 0x02, 0x20, 0x00, + 0x20, 0x01, 0x2d, 0x00, 0x01, 0x3a, 0x00, 0x01, 0x20, 0x02, 0x41, 0x7e, + 0x6a, 0x21, 0x03, 0x20, 0x00, 0x41, 0x02, 0x6a, 0x21, 0x04, 0x20, 0x01, + 0x41, 0x02, 0x6a, 0x22, 0x05, 0x41, 0x03, 0x71, 0x45, 0x0d, 0x02, 0x20, + 0x03, 0x45, 0x0d, 0x02, 0x20, 0x00, 0x20, 0x01, 0x2d, 0x00, 0x02, 0x3a, + 0x00, 0x02, 0x20, 0x02, 0x41, 0x7d, 0x6a, 0x21, 0x03, 0x20, 0x00, 0x41, + 0x03, 0x6a, 0x21, 0x04, 0x20, 0x01, 0x41, 0x03, 0x6a, 0x22, 0x05, 0x41, + 0x03, 0x71, 0x45, 0x0d, 0x02, 0x20, 0x03, 0x45, 0x0d, 0x02, 0x20, 0x00, + 0x20, 0x01, 0x2d, 0x00, 0x03, 0x3a, 0x00, 0x03, 0x20, 0x02, 0x41, 0x7c, + 0x6a, 0x21, 0x03, 0x20, 0x00, 0x41, 0x04, 0x6a, 0x21, 0x04, 0x20, 0x01, + 0x41, 0x04, 0x6a, 0x21, 0x05, 0x0c, 0x02, 0x0b, 0x20, 0x00, 0x20, 0x01, + 0x20, 0x02, 0xfc, 0x0a, 0x00, 0x00, 0x20, 0x00, 0x0f, 0x0b, 0x20, 0x02, + 0x21, 0x03, 0x20, 0x00, 0x21, 0x04, 0x20, 0x01, 0x21, 0x05, 0x0b, 0x02, + 0x40, 0x02, 0x40, 0x20, 0x04, 0x41, 0x03, 0x71, 0x22, 0x02, 0x0d, 0x00, + 0x02, 0x40, 0x02, 0x40, 0x20, 0x03, 0x41, 0x10, 0x4f, 0x0d, 0x00, 0x20, + 0x03, 0x21, 0x02, 0x0c, 0x01, 0x0b, 0x02, 0x40, 0x20, 0x03, 0x41, 0x70, + 0x6a, 0x22, 0x02, 0x41, 0x10, 0x71, 0x0d, 0x00, 0x20, 0x04, 0x20, 0x05, + 0x29, 0x02, 0x00, 0x37, 0x02, 0x00, 0x20, 0x04, 0x20, 0x05, 0x29, 0x02, + 0x08, 0x37, 0x02, 0x08, 0x20, 0x04, 0x41, 0x10, 0x6a, 0x21, 0x04, 0x20, + 0x05, 0x41, 0x10, 0x6a, 0x21, 0x05, 0x20, 0x02, 0x21, 0x03, 0x0b, 0x20, + 0x02, 0x41, 0x10, 0x49, 0x0d, 0x00, 0x20, 0x03, 0x21, 0x02, 0x03, 0x40, + 0x20, 0x04, 0x20, 0x05, 0x29, 0x02, 0x00, 0x37, 0x02, 0x00, 0x20, 0x04, + 0x20, 0x05, 0x29, 0x02, 0x08, 0x37, 0x02, 0x08, 0x20, 0x04, 0x20, 0x05, + 0x29, 0x02, 0x10, 0x37, 0x02, 0x10, 0x20, 0x04, 0x20, 0x05, 0x29, 0x02, + 0x18, 0x37, 0x02, 0x18, 0x20, 0x04, 0x41, 0x20, 0x6a, 0x21, 0x04, 0x20, + 0x05, 0x41, 0x20, 0x6a, 0x21, 0x05, 0x20, 0x02, 0x41, 0x60, 0x6a, 0x22, + 0x02, 0x41, 0x0f, 0x4b, 0x0d, 0x00, 0x0b, 0x0b, 0x02, 0x40, 0x20, 0x02, + 0x41, 0x08, 0x49, 0x0d, 0x00, 0x20, 0x04, 0x20, 0x05, 0x29, 0x02, 0x00, + 0x37, 0x02, 0x00, 0x20, 0x05, 0x41, 0x08, 0x6a, 0x21, 0x05, 0x20, 0x04, + 0x41, 0x08, 0x6a, 0x21, 0x04, 0x0b, 0x02, 0x40, 0x20, 0x02, 0x41, 0x04, + 0x71, 0x45, 0x0d, 0x00, 0x20, 0x04, 0x20, 0x05, 0x28, 0x02, 0x00, 0x36, + 0x02, 0x00, 0x20, 0x05, 0x41, 0x04, 0x6a, 0x21, 0x05, 0x20, 0x04, 0x41, + 0x04, 0x6a, 0x21, 0x04, 0x0b, 0x02, 0x40, 0x20, 0x02, 0x41, 0x02, 0x71, + 0x45, 0x0d, 0x00, 0x20, 0x04, 0x20, 0x05, 0x2f, 0x00, 0x00, 0x3b, 0x00, + 0x00, 0x20, 0x04, 0x41, 0x02, 0x6a, 0x21, 0x04, 0x20, 0x05, 0x41, 0x02, + 0x6a, 0x21, 0x05, 0x0b, 0x20, 0x02, 0x41, 0x01, 0x71, 0x45, 0x0d, 0x01, + 0x20, 0x04, 0x20, 0x05, 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x00, + 0x0f, 0x0b, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, + 0x20, 0x03, 0x41, 0x20, 0x49, 0x0d, 0x00, 0x02, 0x40, 0x02, 0x40, 0x20, + 0x02, 0x41, 0x7f, 0x6a, 0x0e, 0x03, 0x03, 0x00, 0x01, 0x07, 0x0b, 0x20, + 0x04, 0x20, 0x05, 0x28, 0x02, 0x00, 0x3b, 0x00, 0x00, 0x20, 0x04, 0x20, + 0x05, 0x41, 0x02, 0x6a, 0x28, 0x01, 0x00, 0x36, 0x02, 0x02, 0x20, 0x04, + 0x20, 0x05, 0x41, 0x06, 0x6a, 0x29, 0x01, 0x00, 0x37, 0x02, 0x06, 0x20, + 0x04, 0x41, 0x12, 0x6a, 0x21, 0x02, 0x20, 0x05, 0x41, 0x12, 0x6a, 0x21, + 0x01, 0x41, 0x0e, 0x21, 0x06, 0x20, 0x05, 0x41, 0x0e, 0x6a, 0x28, 0x01, + 0x00, 0x21, 0x05, 0x41, 0x0e, 0x21, 0x03, 0x0c, 0x03, 0x0b, 0x20, 0x04, + 0x20, 0x05, 0x28, 0x02, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x04, 0x20, 0x05, + 0x41, 0x01, 0x6a, 0x28, 0x00, 0x00, 0x36, 0x02, 0x01, 0x20, 0x04, 0x20, + 0x05, 0x41, 0x05, 0x6a, 0x29, 0x00, 0x00, 0x37, 0x02, 0x05, 0x20, 0x04, + 0x41, 0x11, 0x6a, 0x21, 0x02, 0x20, 0x05, 0x41, 0x11, 0x6a, 0x21, 0x01, + 0x41, 0x0d, 0x21, 0x06, 0x20, 0x05, 0x41, 0x0d, 0x6a, 0x28, 0x00, 0x00, + 0x21, 0x05, 0x41, 0x0f, 0x21, 0x03, 0x0c, 0x02, 0x0b, 0x02, 0x40, 0x02, + 0x40, 0x20, 0x03, 0x41, 0x10, 0x4f, 0x0d, 0x00, 0x20, 0x04, 0x21, 0x02, + 0x20, 0x05, 0x21, 0x01, 0x0c, 0x01, 0x0b, 0x20, 0x04, 0x20, 0x05, 0x2d, + 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x04, 0x20, 0x05, 0x28, 0x00, 0x01, + 0x36, 0x00, 0x01, 0x20, 0x04, 0x20, 0x05, 0x29, 0x00, 0x05, 0x37, 0x00, + 0x05, 0x20, 0x04, 0x20, 0x05, 0x2f, 0x00, 0x0d, 0x3b, 0x00, 0x0d, 0x20, + 0x04, 0x20, 0x05, 0x2d, 0x00, 0x0f, 0x3a, 0x00, 0x0f, 0x20, 0x04, 0x41, + 0x10, 0x6a, 0x21, 0x02, 0x20, 0x05, 0x41, 0x10, 0x6a, 0x21, 0x01, 0x0b, + 0x20, 0x03, 0x41, 0x08, 0x71, 0x0d, 0x02, 0x0c, 0x03, 0x0b, 0x20, 0x04, + 0x20, 0x05, 0x28, 0x02, 0x00, 0x22, 0x02, 0x3a, 0x00, 0x00, 0x20, 0x04, + 0x20, 0x02, 0x41, 0x10, 0x76, 0x3a, 0x00, 0x02, 0x20, 0x04, 0x20, 0x02, + 0x41, 0x08, 0x76, 0x3a, 0x00, 0x01, 0x20, 0x04, 0x20, 0x05, 0x41, 0x03, + 0x6a, 0x28, 0x00, 0x00, 0x36, 0x02, 0x03, 0x20, 0x04, 0x20, 0x05, 0x41, + 0x07, 0x6a, 0x29, 0x00, 0x00, 0x37, 0x02, 0x07, 0x20, 0x04, 0x41, 0x13, + 0x6a, 0x21, 0x02, 0x20, 0x05, 0x41, 0x13, 0x6a, 0x21, 0x01, 0x41, 0x0f, + 0x21, 0x06, 0x20, 0x05, 0x41, 0x0f, 0x6a, 0x28, 0x00, 0x00, 0x21, 0x05, + 0x41, 0x0d, 0x21, 0x03, 0x0b, 0x20, 0x04, 0x20, 0x06, 0x6a, 0x20, 0x05, + 0x36, 0x02, 0x00, 0x0b, 0x20, 0x02, 0x20, 0x01, 0x29, 0x00, 0x00, 0x37, + 0x00, 0x00, 0x20, 0x02, 0x41, 0x08, 0x6a, 0x21, 0x02, 0x20, 0x01, 0x41, + 0x08, 0x6a, 0x21, 0x01, 0x0b, 0x02, 0x40, 0x20, 0x03, 0x41, 0x04, 0x71, + 0x45, 0x0d, 0x00, 0x20, 0x02, 0x20, 0x01, 0x28, 0x00, 0x00, 0x36, 0x00, + 0x00, 0x20, 0x02, 0x41, 0x04, 0x6a, 0x21, 0x02, 0x20, 0x01, 0x41, 0x04, + 0x6a, 0x21, 0x01, 0x0b, 0x02, 0x40, 0x20, 0x03, 0x41, 0x02, 0x71, 0x45, + 0x0d, 0x00, 0x20, 0x02, 0x20, 0x01, 0x2f, 0x00, 0x00, 0x3b, 0x00, 0x00, + 0x20, 0x02, 0x41, 0x02, 0x6a, 0x21, 0x02, 0x20, 0x01, 0x41, 0x02, 0x6a, + 0x21, 0x01, 0x0b, 0x20, 0x03, 0x41, 0x01, 0x71, 0x45, 0x0d, 0x00, 0x20, + 0x02, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x0b, 0x20, 0x00, + 0x0b, 0x88, 0x03, 0x02, 0x03, 0x7f, 0x01, 0x7e, 0x02, 0x40, 0x20, 0x02, + 0x41, 0x21, 0x49, 0x0d, 0x00, 0x20, 0x00, 0x20, 0x01, 0x20, 0x02, 0xfc, + 0x0b, 0x00, 0x20, 0x00, 0x0f, 0x0b, 0x02, 0x40, 0x20, 0x02, 0x45, 0x0d, + 0x00, 0x20, 0x00, 0x20, 0x01, 0x3a, 0x00, 0x00, 0x20, 0x02, 0x20, 0x00, + 0x6a, 0x22, 0x03, 0x41, 0x7f, 0x6a, 0x20, 0x01, 0x3a, 0x00, 0x00, 0x20, + 0x02, 0x41, 0x03, 0x49, 0x0d, 0x00, 0x20, 0x00, 0x20, 0x01, 0x3a, 0x00, + 0x02, 0x20, 0x00, 0x20, 0x01, 0x3a, 0x00, 0x01, 0x20, 0x03, 0x41, 0x7d, + 0x6a, 0x20, 0x01, 0x3a, 0x00, 0x00, 0x20, 0x03, 0x41, 0x7e, 0x6a, 0x20, + 0x01, 0x3a, 0x00, 0x00, 0x20, 0x02, 0x41, 0x07, 0x49, 0x0d, 0x00, 0x20, + 0x00, 0x20, 0x01, 0x3a, 0x00, 0x03, 0x20, 0x03, 0x41, 0x7c, 0x6a, 0x20, + 0x01, 0x3a, 0x00, 0x00, 0x20, 0x02, 0x41, 0x09, 0x49, 0x0d, 0x00, 0x20, + 0x00, 0x41, 0x00, 0x20, 0x00, 0x6b, 0x41, 0x03, 0x71, 0x22, 0x04, 0x6a, + 0x22, 0x05, 0x20, 0x01, 0x41, 0xff, 0x01, 0x71, 0x41, 0x81, 0x82, 0x84, + 0x08, 0x6c, 0x22, 0x03, 0x36, 0x02, 0x00, 0x20, 0x05, 0x20, 0x02, 0x20, + 0x04, 0x6b, 0x41, 0x7c, 0x71, 0x22, 0x01, 0x6a, 0x22, 0x02, 0x41, 0x7c, + 0x6a, 0x20, 0x03, 0x36, 0x02, 0x00, 0x20, 0x01, 0x41, 0x09, 0x49, 0x0d, + 0x00, 0x20, 0x05, 0x20, 0x03, 0x36, 0x02, 0x08, 0x20, 0x05, 0x20, 0x03, + 0x36, 0x02, 0x04, 0x20, 0x02, 0x41, 0x78, 0x6a, 0x20, 0x03, 0x36, 0x02, + 0x00, 0x20, 0x02, 0x41, 0x74, 0x6a, 0x20, 0x03, 0x36, 0x02, 0x00, 0x20, + 0x01, 0x41, 0x19, 0x49, 0x0d, 0x00, 0x20, 0x05, 0x20, 0x03, 0x36, 0x02, + 0x18, 0x20, 0x05, 0x20, 0x03, 0x36, 0x02, 0x14, 0x20, 0x05, 0x20, 0x03, + 0x36, 0x02, 0x10, 0x20, 0x05, 0x20, 0x03, 0x36, 0x02, 0x0c, 0x20, 0x02, + 0x41, 0x70, 0x6a, 0x20, 0x03, 0x36, 0x02, 0x00, 0x20, 0x02, 0x41, 0x6c, + 0x6a, 0x20, 0x03, 0x36, 0x02, 0x00, 0x20, 0x02, 0x41, 0x68, 0x6a, 0x20, + 0x03, 0x36, 0x02, 0x00, 0x20, 0x02, 0x41, 0x64, 0x6a, 0x20, 0x03, 0x36, + 0x02, 0x00, 0x20, 0x01, 0x20, 0x05, 0x41, 0x04, 0x71, 0x41, 0x18, 0x72, + 0x22, 0x02, 0x6b, 0x22, 0x01, 0x41, 0x20, 0x49, 0x0d, 0x00, 0x20, 0x03, + 0xad, 0x42, 0x81, 0x80, 0x80, 0x80, 0x10, 0x7e, 0x21, 0x06, 0x20, 0x05, + 0x20, 0x02, 0x6a, 0x21, 0x02, 0x03, 0x40, 0x20, 0x02, 0x20, 0x06, 0x37, + 0x03, 0x18, 0x20, 0x02, 0x20, 0x06, 0x37, 0x03, 0x10, 0x20, 0x02, 0x20, + 0x06, 0x37, 0x03, 0x08, 0x20, 0x02, 0x20, 0x06, 0x37, 0x03, 0x00, 0x20, + 0x02, 0x41, 0x20, 0x6a, 0x21, 0x02, 0x20, 0x01, 0x41, 0x60, 0x6a, 0x22, + 0x01, 0x41, 0x1f, 0x4b, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x00, 0x0b, 0xcc, + 0x01, 0x01, 0x03, 0x7f, 0x20, 0x00, 0x21, 0x01, 0x02, 0x40, 0x02, 0x40, + 0x20, 0x00, 0x41, 0x03, 0x71, 0x45, 0x0d, 0x00, 0x02, 0x40, 0x20, 0x00, + 0x2d, 0x00, 0x00, 0x0d, 0x00, 0x20, 0x00, 0x20, 0x00, 0x6b, 0x0f, 0x0b, + 0x20, 0x00, 0x41, 0x01, 0x6a, 0x22, 0x01, 0x41, 0x03, 0x71, 0x45, 0x0d, + 0x00, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x45, 0x0d, 0x01, 0x20, 0x00, 0x41, + 0x02, 0x6a, 0x22, 0x01, 0x41, 0x03, 0x71, 0x45, 0x0d, 0x00, 0x20, 0x01, + 0x2d, 0x00, 0x00, 0x45, 0x0d, 0x01, 0x20, 0x00, 0x41, 0x03, 0x6a, 0x22, + 0x01, 0x41, 0x03, 0x71, 0x45, 0x0d, 0x00, 0x20, 0x01, 0x2d, 0x00, 0x00, + 0x45, 0x0d, 0x01, 0x20, 0x00, 0x41, 0x04, 0x6a, 0x22, 0x01, 0x41, 0x03, + 0x71, 0x0d, 0x01, 0x0b, 0x20, 0x01, 0x41, 0x7c, 0x6a, 0x21, 0x02, 0x20, + 0x01, 0x41, 0x7b, 0x6a, 0x21, 0x01, 0x03, 0x40, 0x20, 0x01, 0x41, 0x04, + 0x6a, 0x21, 0x01, 0x20, 0x02, 0x41, 0x04, 0x6a, 0x22, 0x02, 0x28, 0x02, + 0x00, 0x22, 0x03, 0x41, 0x7f, 0x73, 0x20, 0x03, 0x41, 0xff, 0xfd, 0xfb, + 0x77, 0x6a, 0x71, 0x41, 0x80, 0x81, 0x82, 0x84, 0x78, 0x71, 0x45, 0x0d, + 0x00, 0x0b, 0x03, 0x40, 0x20, 0x01, 0x41, 0x01, 0x6a, 0x21, 0x01, 0x20, + 0x02, 0x2d, 0x00, 0x00, 0x21, 0x03, 0x20, 0x02, 0x41, 0x01, 0x6a, 0x21, + 0x02, 0x20, 0x03, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x01, 0x20, 0x00, 0x6b, + 0x0b, 0x44, 0x00, 0x02, 0x40, 0x20, 0x00, 0x41, 0xff, 0xff, 0x07, 0x4b, + 0x0d, 0x00, 0x20, 0x00, 0x41, 0x08, 0x76, 0x41, 0x80, 0x80, 0x84, 0x80, + 0x00, 0x6a, 0x2d, 0x00, 0x00, 0x41, 0x05, 0x74, 0x20, 0x00, 0x41, 0x03, + 0x76, 0x41, 0x1f, 0x71, 0x72, 0x41, 0x80, 0x80, 0x84, 0x80, 0x00, 0x6a, + 0x2d, 0x00, 0x00, 0x20, 0x00, 0x41, 0x07, 0x71, 0x76, 0x41, 0x01, 0x71, + 0x0f, 0x0b, 0x20, 0x00, 0x41, 0xfe, 0xff, 0x0b, 0x49, 0x0b, 0x49, 0x01, + 0x03, 0x7f, 0x41, 0x00, 0x21, 0x03, 0x02, 0x40, 0x20, 0x02, 0x45, 0x0d, + 0x00, 0x02, 0x40, 0x03, 0x40, 0x20, 0x00, 0x2d, 0x00, 0x00, 0x22, 0x04, + 0x20, 0x01, 0x2d, 0x00, 0x00, 0x22, 0x05, 0x47, 0x0d, 0x01, 0x20, 0x01, + 0x41, 0x01, 0x6a, 0x21, 0x01, 0x20, 0x00, 0x41, 0x01, 0x6a, 0x21, 0x00, + 0x20, 0x02, 0x41, 0x7f, 0x6a, 0x22, 0x02, 0x0d, 0x00, 0x0c, 0x02, 0x0b, + 0x0b, 0x20, 0x04, 0x20, 0x05, 0x6b, 0x21, 0x03, 0x0b, 0x20, 0x03, 0x0b, + 0xf2, 0x02, 0x01, 0x03, 0x7f, 0x20, 0x02, 0x41, 0x00, 0x47, 0x21, 0x03, + 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, 0x20, 0x00, 0x41, 0x03, + 0x71, 0x45, 0x0d, 0x00, 0x20, 0x02, 0x45, 0x0d, 0x00, 0x02, 0x40, 0x20, + 0x00, 0x2d, 0x00, 0x00, 0x20, 0x01, 0x41, 0xff, 0x01, 0x71, 0x47, 0x0d, + 0x00, 0x20, 0x00, 0x21, 0x04, 0x20, 0x02, 0x21, 0x05, 0x0c, 0x03, 0x0b, + 0x20, 0x02, 0x41, 0x7f, 0x6a, 0x22, 0x05, 0x41, 0x00, 0x47, 0x21, 0x03, + 0x20, 0x00, 0x41, 0x01, 0x6a, 0x22, 0x04, 0x41, 0x03, 0x71, 0x45, 0x0d, + 0x01, 0x20, 0x05, 0x45, 0x0d, 0x01, 0x20, 0x04, 0x2d, 0x00, 0x00, 0x20, + 0x01, 0x41, 0xff, 0x01, 0x71, 0x46, 0x0d, 0x02, 0x20, 0x02, 0x41, 0x7e, + 0x6a, 0x22, 0x05, 0x41, 0x00, 0x47, 0x21, 0x03, 0x20, 0x00, 0x41, 0x02, + 0x6a, 0x22, 0x04, 0x41, 0x03, 0x71, 0x45, 0x0d, 0x01, 0x20, 0x05, 0x45, + 0x0d, 0x01, 0x20, 0x04, 0x2d, 0x00, 0x00, 0x20, 0x01, 0x41, 0xff, 0x01, + 0x71, 0x46, 0x0d, 0x02, 0x20, 0x02, 0x41, 0x7d, 0x6a, 0x22, 0x05, 0x41, + 0x00, 0x47, 0x21, 0x03, 0x20, 0x00, 0x41, 0x03, 0x6a, 0x22, 0x04, 0x41, + 0x03, 0x71, 0x45, 0x0d, 0x01, 0x20, 0x05, 0x45, 0x0d, 0x01, 0x20, 0x04, + 0x2d, 0x00, 0x00, 0x20, 0x01, 0x41, 0xff, 0x01, 0x71, 0x46, 0x0d, 0x02, + 0x20, 0x00, 0x41, 0x04, 0x6a, 0x21, 0x04, 0x20, 0x02, 0x41, 0x7c, 0x6a, + 0x22, 0x05, 0x41, 0x00, 0x47, 0x21, 0x03, 0x0c, 0x01, 0x0b, 0x20, 0x02, + 0x21, 0x05, 0x20, 0x00, 0x21, 0x04, 0x0b, 0x20, 0x03, 0x45, 0x0d, 0x01, + 0x02, 0x40, 0x20, 0x04, 0x2d, 0x00, 0x00, 0x20, 0x01, 0x41, 0xff, 0x01, + 0x71, 0x46, 0x0d, 0x00, 0x20, 0x05, 0x41, 0x04, 0x49, 0x0d, 0x00, 0x20, + 0x01, 0x41, 0xff, 0x01, 0x71, 0x41, 0x81, 0x82, 0x84, 0x08, 0x6c, 0x21, + 0x00, 0x03, 0x40, 0x20, 0x04, 0x28, 0x02, 0x00, 0x20, 0x00, 0x73, 0x22, + 0x02, 0x41, 0x7f, 0x73, 0x20, 0x02, 0x41, 0xff, 0xfd, 0xfb, 0x77, 0x6a, + 0x71, 0x41, 0x80, 0x81, 0x82, 0x84, 0x78, 0x71, 0x0d, 0x02, 0x20, 0x04, + 0x41, 0x04, 0x6a, 0x21, 0x04, 0x20, 0x05, 0x41, 0x7c, 0x6a, 0x22, 0x05, + 0x41, 0x03, 0x4b, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x05, 0x45, 0x0d, 0x01, + 0x0b, 0x20, 0x01, 0x41, 0xff, 0x01, 0x71, 0x21, 0x02, 0x03, 0x40, 0x02, + 0x40, 0x20, 0x04, 0x2d, 0x00, 0x00, 0x20, 0x02, 0x47, 0x0d, 0x00, 0x20, + 0x04, 0x0f, 0x0b, 0x20, 0x04, 0x41, 0x01, 0x6a, 0x21, 0x04, 0x20, 0x05, + 0x41, 0x7f, 0x6a, 0x22, 0x05, 0x0d, 0x00, 0x0b, 0x0b, 0x41, 0x00, 0x0b, + 0x67, 0x01, 0x02, 0x7f, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x21, 0x02, 0x02, + 0x40, 0x20, 0x00, 0x2d, 0x00, 0x00, 0x22, 0x03, 0x45, 0x0d, 0x00, 0x20, + 0x03, 0x20, 0x02, 0x41, 0xff, 0x01, 0x71, 0x47, 0x0d, 0x00, 0x20, 0x00, + 0x41, 0x01, 0x6a, 0x21, 0x00, 0x20, 0x01, 0x41, 0x01, 0x6a, 0x21, 0x01, + 0x03, 0x40, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x21, 0x02, 0x20, 0x00, 0x2d, + 0x00, 0x00, 0x22, 0x03, 0x45, 0x0d, 0x01, 0x20, 0x00, 0x41, 0x01, 0x6a, + 0x21, 0x00, 0x20, 0x01, 0x41, 0x01, 0x6a, 0x21, 0x01, 0x20, 0x03, 0x20, + 0x02, 0x41, 0xff, 0x01, 0x71, 0x46, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x03, + 0x20, 0x02, 0x41, 0xff, 0x01, 0x71, 0x6b, 0x0b, 0x0c, 0x00, 0x20, 0x00, + 0x41, 0x00, 0x10, 0x9b, 0x80, 0x80, 0x80, 0x00, 0x0b, 0xbc, 0x02, 0x01, + 0x06, 0x7f, 0x02, 0x40, 0x20, 0x00, 0x41, 0xff, 0xff, 0x07, 0x4b, 0x0d, + 0x00, 0x20, 0x00, 0x20, 0x00, 0x41, 0xff, 0x01, 0x71, 0x22, 0x02, 0x41, + 0x03, 0x6e, 0x22, 0x03, 0x41, 0x03, 0x6c, 0x6b, 0x41, 0xff, 0x01, 0x71, + 0x41, 0x02, 0x74, 0x41, 0xc0, 0x9e, 0x84, 0x80, 0x00, 0x6a, 0x28, 0x02, + 0x00, 0x20, 0x00, 0x41, 0x08, 0x76, 0x22, 0x04, 0x41, 0xa0, 0xa9, 0x84, + 0x80, 0x00, 0x6a, 0x2d, 0x00, 0x00, 0x41, 0xd6, 0x00, 0x6c, 0x20, 0x03, + 0x6a, 0x41, 0xa0, 0xa9, 0x84, 0x80, 0x00, 0x6a, 0x2d, 0x00, 0x00, 0x6c, + 0x41, 0x0b, 0x76, 0x41, 0x06, 0x70, 0x20, 0x04, 0x41, 0x90, 0xbe, 0x84, + 0x80, 0x00, 0x6a, 0x2d, 0x00, 0x00, 0x6a, 0x41, 0x02, 0x74, 0x41, 0xd0, + 0x9e, 0x84, 0x80, 0x00, 0x6a, 0x28, 0x02, 0x00, 0x22, 0x03, 0x41, 0x08, + 0x75, 0x21, 0x04, 0x02, 0x40, 0x20, 0x03, 0x41, 0xff, 0x01, 0x71, 0x22, + 0x03, 0x41, 0x01, 0x4b, 0x0d, 0x00, 0x20, 0x04, 0x41, 0x00, 0x20, 0x03, + 0x20, 0x01, 0x73, 0x6b, 0x71, 0x20, 0x00, 0x6a, 0x0f, 0x0b, 0x20, 0x04, + 0x41, 0xff, 0x01, 0x71, 0x22, 0x03, 0x45, 0x0d, 0x00, 0x20, 0x04, 0x41, + 0x08, 0x76, 0x21, 0x04, 0x03, 0x40, 0x02, 0x40, 0x20, 0x02, 0x20, 0x03, + 0x41, 0x01, 0x76, 0x22, 0x05, 0x20, 0x04, 0x6a, 0x22, 0x06, 0x41, 0x01, + 0x74, 0x41, 0x90, 0xa6, 0x84, 0x80, 0x00, 0x6a, 0x2d, 0x00, 0x00, 0x22, + 0x07, 0x47, 0x0d, 0x00, 0x02, 0x40, 0x20, 0x06, 0x41, 0x01, 0x74, 0x41, + 0x91, 0xa6, 0x84, 0x80, 0x00, 0x6a, 0x2d, 0x00, 0x00, 0x41, 0x02, 0x74, + 0x41, 0xd0, 0x9e, 0x84, 0x80, 0x00, 0x6a, 0x28, 0x02, 0x00, 0x22, 0x03, + 0x41, 0xff, 0x01, 0x71, 0x22, 0x04, 0x41, 0x01, 0x4b, 0x0d, 0x00, 0x20, + 0x03, 0x41, 0x08, 0x75, 0x41, 0x00, 0x20, 0x04, 0x20, 0x01, 0x73, 0x6b, + 0x71, 0x20, 0x00, 0x6a, 0x0f, 0x0b, 0x41, 0x7f, 0x41, 0x01, 0x20, 0x01, + 0x1b, 0x20, 0x00, 0x6a, 0x0f, 0x0b, 0x20, 0x04, 0x20, 0x06, 0x20, 0x02, + 0x20, 0x07, 0x49, 0x22, 0x07, 0x1b, 0x21, 0x04, 0x20, 0x05, 0x20, 0x03, + 0x20, 0x05, 0x6b, 0x20, 0x07, 0x1b, 0x22, 0x03, 0x0d, 0x00, 0x0b, 0x0b, + 0x20, 0x00, 0x0b, 0x0c, 0x00, 0x20, 0x00, 0x41, 0x01, 0x10, 0x9b, 0x80, + 0x80, 0x80, 0x00, 0x0b, 0x7b, 0x01, 0x02, 0x7f, 0x02, 0x40, 0x20, 0x02, + 0x0d, 0x00, 0x41, 0x00, 0x0f, 0x0b, 0x02, 0x40, 0x02, 0x40, 0x20, 0x00, + 0x2d, 0x00, 0x00, 0x22, 0x03, 0x45, 0x0d, 0x00, 0x20, 0x00, 0x41, 0x01, + 0x6a, 0x21, 0x00, 0x20, 0x02, 0x41, 0x7f, 0x6a, 0x21, 0x02, 0x03, 0x40, + 0x20, 0x03, 0x41, 0xff, 0x01, 0x71, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x22, + 0x04, 0x47, 0x0d, 0x02, 0x20, 0x04, 0x45, 0x0d, 0x02, 0x20, 0x02, 0x41, + 0x00, 0x46, 0x0d, 0x02, 0x20, 0x02, 0x41, 0x7f, 0x6a, 0x21, 0x02, 0x20, + 0x01, 0x41, 0x01, 0x6a, 0x21, 0x01, 0x20, 0x00, 0x2d, 0x00, 0x00, 0x21, + 0x03, 0x20, 0x00, 0x41, 0x01, 0x6a, 0x21, 0x00, 0x20, 0x03, 0x0d, 0x00, + 0x0b, 0x0b, 0x41, 0x00, 0x21, 0x03, 0x0b, 0x20, 0x03, 0x41, 0xff, 0x01, + 0x71, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x6b, 0x0b, 0x0d, 0x00, 0x20, 0x00, + 0x10, 0x9a, 0x80, 0x80, 0x80, 0x00, 0x20, 0x00, 0x47, 0x0b, 0xbf, 0x09, + 0x01, 0x04, 0x7f, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, 0x20, 0x02, 0x41, + 0x21, 0x4f, 0x0d, 0x00, 0x20, 0x00, 0x20, 0x01, 0x46, 0x0d, 0x02, 0x20, + 0x01, 0x20, 0x00, 0x20, 0x02, 0x6a, 0x22, 0x03, 0x6b, 0x41, 0x00, 0x20, + 0x02, 0x41, 0x01, 0x74, 0x6b, 0x4b, 0x0d, 0x01, 0x0b, 0x20, 0x00, 0x20, + 0x01, 0x20, 0x02, 0xfc, 0x0a, 0x00, 0x00, 0x0c, 0x01, 0x0b, 0x20, 0x01, + 0x20, 0x00, 0x73, 0x41, 0x03, 0x71, 0x21, 0x04, 0x02, 0x40, 0x02, 0x40, + 0x02, 0x40, 0x20, 0x00, 0x20, 0x01, 0x4f, 0x0d, 0x00, 0x02, 0x40, 0x20, + 0x04, 0x45, 0x0d, 0x00, 0x20, 0x02, 0x21, 0x05, 0x20, 0x00, 0x21, 0x03, + 0x0c, 0x03, 0x0b, 0x02, 0x40, 0x20, 0x00, 0x41, 0x03, 0x71, 0x0d, 0x00, + 0x20, 0x02, 0x21, 0x05, 0x20, 0x00, 0x21, 0x03, 0x0c, 0x02, 0x0b, 0x20, + 0x02, 0x45, 0x0d, 0x03, 0x20, 0x00, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x3a, + 0x00, 0x00, 0x20, 0x02, 0x41, 0x7f, 0x6a, 0x21, 0x05, 0x02, 0x40, 0x20, + 0x00, 0x41, 0x01, 0x6a, 0x22, 0x03, 0x41, 0x03, 0x71, 0x0d, 0x00, 0x20, + 0x01, 0x41, 0x01, 0x6a, 0x21, 0x01, 0x0c, 0x02, 0x0b, 0x20, 0x05, 0x45, + 0x0d, 0x03, 0x20, 0x00, 0x20, 0x01, 0x2d, 0x00, 0x01, 0x3a, 0x00, 0x01, + 0x20, 0x02, 0x41, 0x7e, 0x6a, 0x21, 0x05, 0x02, 0x40, 0x20, 0x00, 0x41, + 0x02, 0x6a, 0x22, 0x03, 0x41, 0x03, 0x71, 0x0d, 0x00, 0x20, 0x01, 0x41, + 0x02, 0x6a, 0x21, 0x01, 0x0c, 0x02, 0x0b, 0x20, 0x05, 0x45, 0x0d, 0x03, + 0x20, 0x00, 0x20, 0x01, 0x2d, 0x00, 0x02, 0x3a, 0x00, 0x02, 0x20, 0x02, + 0x41, 0x7d, 0x6a, 0x21, 0x05, 0x02, 0x40, 0x20, 0x00, 0x41, 0x03, 0x6a, + 0x22, 0x03, 0x41, 0x03, 0x71, 0x0d, 0x00, 0x20, 0x01, 0x41, 0x03, 0x6a, + 0x21, 0x01, 0x0c, 0x02, 0x0b, 0x20, 0x05, 0x45, 0x0d, 0x03, 0x20, 0x00, + 0x20, 0x01, 0x2d, 0x00, 0x03, 0x3a, 0x00, 0x03, 0x20, 0x00, 0x41, 0x04, + 0x6a, 0x21, 0x03, 0x20, 0x01, 0x41, 0x04, 0x6a, 0x21, 0x01, 0x20, 0x02, + 0x41, 0x7c, 0x6a, 0x21, 0x05, 0x0c, 0x01, 0x0b, 0x02, 0x40, 0x20, 0x04, + 0x0d, 0x00, 0x02, 0x40, 0x20, 0x03, 0x41, 0x03, 0x71, 0x45, 0x0d, 0x00, + 0x20, 0x02, 0x45, 0x0d, 0x04, 0x20, 0x00, 0x20, 0x02, 0x41, 0x7f, 0x6a, + 0x22, 0x03, 0x6a, 0x22, 0x04, 0x20, 0x01, 0x20, 0x03, 0x6a, 0x2d, 0x00, + 0x00, 0x3a, 0x00, 0x00, 0x02, 0x40, 0x20, 0x04, 0x41, 0x03, 0x71, 0x0d, + 0x00, 0x20, 0x03, 0x21, 0x02, 0x0c, 0x01, 0x0b, 0x20, 0x03, 0x45, 0x0d, + 0x04, 0x20, 0x00, 0x20, 0x02, 0x41, 0x7e, 0x6a, 0x22, 0x03, 0x6a, 0x22, + 0x04, 0x20, 0x01, 0x20, 0x03, 0x6a, 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, + 0x02, 0x40, 0x20, 0x04, 0x41, 0x03, 0x71, 0x0d, 0x00, 0x20, 0x03, 0x21, + 0x02, 0x0c, 0x01, 0x0b, 0x20, 0x03, 0x45, 0x0d, 0x04, 0x20, 0x00, 0x20, + 0x02, 0x41, 0x7d, 0x6a, 0x22, 0x03, 0x6a, 0x22, 0x04, 0x20, 0x01, 0x20, + 0x03, 0x6a, 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x02, 0x40, 0x20, 0x04, + 0x41, 0x03, 0x71, 0x0d, 0x00, 0x20, 0x03, 0x21, 0x02, 0x0c, 0x01, 0x0b, + 0x20, 0x03, 0x45, 0x0d, 0x04, 0x20, 0x00, 0x20, 0x02, 0x41, 0x7c, 0x6a, + 0x22, 0x02, 0x6a, 0x20, 0x01, 0x20, 0x02, 0x6a, 0x2d, 0x00, 0x00, 0x3a, + 0x00, 0x00, 0x0b, 0x20, 0x02, 0x41, 0x04, 0x49, 0x0d, 0x00, 0x02, 0x40, + 0x20, 0x02, 0x41, 0x7c, 0x6a, 0x22, 0x06, 0x41, 0x02, 0x76, 0x41, 0x01, + 0x6a, 0x41, 0x03, 0x71, 0x22, 0x03, 0x45, 0x0d, 0x00, 0x20, 0x01, 0x41, + 0x7c, 0x6a, 0x21, 0x04, 0x20, 0x00, 0x41, 0x7c, 0x6a, 0x21, 0x05, 0x03, + 0x40, 0x20, 0x05, 0x20, 0x02, 0x6a, 0x20, 0x04, 0x20, 0x02, 0x6a, 0x28, + 0x02, 0x00, 0x36, 0x02, 0x00, 0x20, 0x02, 0x41, 0x7c, 0x6a, 0x21, 0x02, + 0x20, 0x03, 0x41, 0x7f, 0x6a, 0x22, 0x03, 0x0d, 0x00, 0x0b, 0x0b, 0x20, + 0x06, 0x41, 0x0c, 0x49, 0x0d, 0x00, 0x20, 0x01, 0x41, 0x70, 0x6a, 0x21, + 0x05, 0x20, 0x00, 0x41, 0x70, 0x6a, 0x21, 0x06, 0x03, 0x40, 0x20, 0x06, + 0x20, 0x02, 0x6a, 0x22, 0x03, 0x41, 0x0c, 0x6a, 0x20, 0x05, 0x20, 0x02, + 0x6a, 0x22, 0x04, 0x41, 0x0c, 0x6a, 0x28, 0x02, 0x00, 0x36, 0x02, 0x00, + 0x20, 0x03, 0x41, 0x08, 0x6a, 0x20, 0x04, 0x41, 0x08, 0x6a, 0x28, 0x02, + 0x00, 0x36, 0x02, 0x00, 0x20, 0x03, 0x41, 0x04, 0x6a, 0x20, 0x04, 0x41, + 0x04, 0x6a, 0x28, 0x02, 0x00, 0x36, 0x02, 0x00, 0x20, 0x03, 0x20, 0x04, + 0x28, 0x02, 0x00, 0x36, 0x02, 0x00, 0x20, 0x02, 0x41, 0x70, 0x6a, 0x22, + 0x02, 0x41, 0x03, 0x4b, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x02, 0x45, 0x0d, + 0x02, 0x20, 0x02, 0x21, 0x03, 0x02, 0x40, 0x20, 0x02, 0x41, 0x03, 0x71, + 0x22, 0x04, 0x45, 0x0d, 0x00, 0x20, 0x01, 0x41, 0x7f, 0x6a, 0x21, 0x05, + 0x20, 0x00, 0x41, 0x7f, 0x6a, 0x21, 0x06, 0x20, 0x02, 0x21, 0x03, 0x03, + 0x40, 0x20, 0x06, 0x20, 0x03, 0x6a, 0x20, 0x05, 0x20, 0x03, 0x6a, 0x2d, + 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x03, 0x41, 0x7f, 0x6a, 0x21, 0x03, + 0x20, 0x04, 0x41, 0x7f, 0x6a, 0x22, 0x04, 0x0d, 0x00, 0x0b, 0x0b, 0x20, + 0x02, 0x41, 0x04, 0x49, 0x0d, 0x02, 0x20, 0x01, 0x41, 0x7c, 0x6a, 0x21, + 0x04, 0x20, 0x00, 0x41, 0x7c, 0x6a, 0x21, 0x05, 0x03, 0x40, 0x20, 0x05, + 0x20, 0x03, 0x6a, 0x22, 0x01, 0x41, 0x03, 0x6a, 0x20, 0x04, 0x20, 0x03, + 0x6a, 0x22, 0x02, 0x41, 0x03, 0x6a, 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, + 0x20, 0x01, 0x41, 0x02, 0x6a, 0x20, 0x02, 0x41, 0x02, 0x6a, 0x2d, 0x00, + 0x00, 0x3a, 0x00, 0x00, 0x20, 0x01, 0x41, 0x01, 0x6a, 0x20, 0x02, 0x41, + 0x01, 0x6a, 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x01, 0x20, 0x02, + 0x2d, 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x03, 0x41, 0x7c, 0x6a, 0x22, + 0x03, 0x0d, 0x00, 0x0c, 0x03, 0x0b, 0x0b, 0x20, 0x05, 0x41, 0x04, 0x49, + 0x0d, 0x00, 0x02, 0x40, 0x20, 0x05, 0x41, 0x7c, 0x6a, 0x22, 0x04, 0x41, + 0x02, 0x76, 0x41, 0x01, 0x6a, 0x41, 0x07, 0x71, 0x22, 0x02, 0x45, 0x0d, + 0x00, 0x20, 0x05, 0x20, 0x02, 0x41, 0x02, 0x74, 0x6b, 0x21, 0x05, 0x03, + 0x40, 0x20, 0x03, 0x20, 0x01, 0x28, 0x02, 0x00, 0x36, 0x02, 0x00, 0x20, + 0x01, 0x41, 0x04, 0x6a, 0x21, 0x01, 0x20, 0x03, 0x41, 0x04, 0x6a, 0x21, + 0x03, 0x20, 0x02, 0x41, 0x7f, 0x6a, 0x22, 0x02, 0x0d, 0x00, 0x0b, 0x0b, + 0x20, 0x04, 0x41, 0x1c, 0x49, 0x0d, 0x00, 0x03, 0x40, 0x20, 0x03, 0x20, + 0x01, 0x28, 0x02, 0x00, 0x36, 0x02, 0x00, 0x20, 0x03, 0x20, 0x01, 0x28, + 0x02, 0x04, 0x36, 0x02, 0x04, 0x20, 0x03, 0x20, 0x01, 0x28, 0x02, 0x08, + 0x36, 0x02, 0x08, 0x20, 0x03, 0x20, 0x01, 0x28, 0x02, 0x0c, 0x36, 0x02, + 0x0c, 0x20, 0x03, 0x20, 0x01, 0x28, 0x02, 0x10, 0x36, 0x02, 0x10, 0x20, + 0x03, 0x20, 0x01, 0x28, 0x02, 0x14, 0x36, 0x02, 0x14, 0x20, 0x03, 0x20, + 0x01, 0x28, 0x02, 0x18, 0x36, 0x02, 0x18, 0x20, 0x03, 0x20, 0x01, 0x28, + 0x02, 0x1c, 0x36, 0x02, 0x1c, 0x20, 0x01, 0x41, 0x20, 0x6a, 0x21, 0x01, + 0x20, 0x03, 0x41, 0x20, 0x6a, 0x21, 0x03, 0x20, 0x05, 0x41, 0x60, 0x6a, + 0x22, 0x05, 0x41, 0x03, 0x4b, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x05, 0x45, + 0x0d, 0x00, 0x02, 0x40, 0x02, 0x40, 0x20, 0x05, 0x41, 0x07, 0x71, 0x22, + 0x02, 0x0d, 0x00, 0x20, 0x05, 0x21, 0x04, 0x0c, 0x01, 0x0b, 0x20, 0x05, + 0x41, 0x78, 0x71, 0x21, 0x04, 0x03, 0x40, 0x20, 0x03, 0x20, 0x01, 0x2d, + 0x00, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x03, 0x41, 0x01, 0x6a, 0x21, 0x03, + 0x20, 0x01, 0x41, 0x01, 0x6a, 0x21, 0x01, 0x20, 0x02, 0x41, 0x7f, 0x6a, + 0x22, 0x02, 0x0d, 0x00, 0x0b, 0x0b, 0x20, 0x05, 0x41, 0x08, 0x49, 0x0d, + 0x00, 0x03, 0x40, 0x20, 0x03, 0x20, 0x01, 0x2d, 0x00, 0x00, 0x3a, 0x00, + 0x00, 0x20, 0x03, 0x20, 0x01, 0x2d, 0x00, 0x01, 0x3a, 0x00, 0x01, 0x20, + 0x03, 0x20, 0x01, 0x2d, 0x00, 0x02, 0x3a, 0x00, 0x02, 0x20, 0x03, 0x20, + 0x01, 0x2d, 0x00, 0x03, 0x3a, 0x00, 0x03, 0x20, 0x03, 0x20, 0x01, 0x2d, + 0x00, 0x04, 0x3a, 0x00, 0x04, 0x20, 0x03, 0x20, 0x01, 0x2d, 0x00, 0x05, + 0x3a, 0x00, 0x05, 0x20, 0x03, 0x20, 0x01, 0x2d, 0x00, 0x06, 0x3a, 0x00, + 0x06, 0x20, 0x03, 0x20, 0x01, 0x2d, 0x00, 0x07, 0x3a, 0x00, 0x07, 0x20, + 0x03, 0x41, 0x08, 0x6a, 0x21, 0x03, 0x20, 0x01, 0x41, 0x08, 0x6a, 0x21, + 0x01, 0x20, 0x04, 0x41, 0x78, 0x6a, 0x22, 0x04, 0x0d, 0x00, 0x0b, 0x0b, + 0x20, 0x00, 0x0b, 0x0d, 0x00, 0x20, 0x00, 0x10, 0x9c, 0x80, 0x80, 0x80, + 0x00, 0x20, 0x00, 0x47, 0x0b, 0x0d, 0x00, 0x20, 0x00, 0x41, 0x20, 0x46, + 0x20, 0x00, 0x41, 0x09, 0x46, 0x72, 0x0b, 0x0a, 0x00, 0x20, 0x00, 0x10, + 0xa1, 0x80, 0x80, 0x80, 0x00, 0x0b, 0x0a, 0x00, 0x20, 0x00, 0x41, 0x50, + 0x6a, 0x41, 0x0a, 0x49, 0x0b, 0x4d, 0x01, 0x02, 0x7f, 0x20, 0x00, 0x20, + 0x00, 0x10, 0x95, 0x80, 0x80, 0x80, 0x00, 0x6a, 0x21, 0x03, 0x02, 0x40, + 0x20, 0x02, 0x45, 0x0d, 0x00, 0x03, 0x40, 0x20, 0x01, 0x2d, 0x00, 0x00, + 0x22, 0x04, 0x45, 0x0d, 0x01, 0x20, 0x03, 0x20, 0x04, 0x3a, 0x00, 0x00, + 0x20, 0x03, 0x41, 0x01, 0x6a, 0x21, 0x03, 0x20, 0x01, 0x41, 0x01, 0x6a, + 0x21, 0x01, 0x20, 0x02, 0x41, 0x7f, 0x6a, 0x22, 0x02, 0x0d, 0x00, 0x0b, + 0x0b, 0x20, 0x03, 0x41, 0x00, 0x3a, 0x00, 0x00, 0x20, 0x00, 0x0b, 0xef, + 0x03, 0x01, 0x04, 0x7f, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, 0x02, 0x40, + 0x02, 0x40, 0x20, 0x01, 0x20, 0x00, 0x73, 0x41, 0x03, 0x71, 0x45, 0x0d, + 0x00, 0x20, 0x00, 0x21, 0x03, 0x0c, 0x01, 0x0b, 0x20, 0x02, 0x41, 0x00, + 0x47, 0x21, 0x04, 0x02, 0x40, 0x02, 0x40, 0x20, 0x01, 0x41, 0x03, 0x71, + 0x0d, 0x00, 0x20, 0x00, 0x21, 0x03, 0x0c, 0x01, 0x0b, 0x02, 0x40, 0x20, + 0x02, 0x0d, 0x00, 0x20, 0x00, 0x21, 0x03, 0x0c, 0x01, 0x0b, 0x20, 0x00, + 0x20, 0x01, 0x2d, 0x00, 0x00, 0x22, 0x03, 0x3a, 0x00, 0x00, 0x02, 0x40, + 0x20, 0x03, 0x0d, 0x00, 0x20, 0x00, 0x21, 0x03, 0x20, 0x02, 0x21, 0x05, + 0x0c, 0x05, 0x0b, 0x20, 0x00, 0x41, 0x01, 0x6a, 0x21, 0x03, 0x20, 0x02, + 0x41, 0x7f, 0x6a, 0x22, 0x05, 0x41, 0x00, 0x47, 0x21, 0x04, 0x02, 0x40, + 0x20, 0x01, 0x41, 0x01, 0x6a, 0x22, 0x06, 0x41, 0x03, 0x71, 0x45, 0x0d, + 0x00, 0x20, 0x05, 0x45, 0x0d, 0x00, 0x20, 0x03, 0x20, 0x06, 0x2d, 0x00, + 0x00, 0x22, 0x04, 0x3a, 0x00, 0x00, 0x20, 0x04, 0x45, 0x0d, 0x05, 0x20, + 0x00, 0x41, 0x02, 0x6a, 0x21, 0x03, 0x20, 0x02, 0x41, 0x7e, 0x6a, 0x22, + 0x05, 0x41, 0x00, 0x47, 0x21, 0x04, 0x02, 0x40, 0x20, 0x01, 0x41, 0x02, + 0x6a, 0x22, 0x06, 0x41, 0x03, 0x71, 0x45, 0x0d, 0x00, 0x20, 0x05, 0x45, + 0x0d, 0x00, 0x20, 0x03, 0x20, 0x06, 0x2d, 0x00, 0x00, 0x22, 0x04, 0x3a, + 0x00, 0x00, 0x20, 0x04, 0x45, 0x0d, 0x06, 0x20, 0x00, 0x41, 0x03, 0x6a, + 0x21, 0x03, 0x20, 0x02, 0x41, 0x7d, 0x6a, 0x22, 0x05, 0x41, 0x00, 0x47, + 0x21, 0x04, 0x02, 0x40, 0x20, 0x01, 0x41, 0x03, 0x6a, 0x22, 0x06, 0x41, + 0x03, 0x71, 0x45, 0x0d, 0x00, 0x20, 0x05, 0x45, 0x0d, 0x00, 0x20, 0x03, + 0x20, 0x06, 0x2d, 0x00, 0x00, 0x22, 0x04, 0x3a, 0x00, 0x00, 0x20, 0x04, + 0x45, 0x0d, 0x07, 0x20, 0x00, 0x41, 0x04, 0x6a, 0x21, 0x03, 0x20, 0x01, + 0x41, 0x04, 0x6a, 0x21, 0x01, 0x20, 0x02, 0x41, 0x7c, 0x6a, 0x22, 0x02, + 0x41, 0x00, 0x47, 0x21, 0x04, 0x0c, 0x03, 0x0b, 0x20, 0x06, 0x21, 0x01, + 0x20, 0x05, 0x21, 0x02, 0x0c, 0x02, 0x0b, 0x20, 0x06, 0x21, 0x01, 0x20, + 0x05, 0x21, 0x02, 0x0c, 0x01, 0x0b, 0x20, 0x06, 0x21, 0x01, 0x20, 0x05, + 0x21, 0x02, 0x0b, 0x20, 0x04, 0x45, 0x0d, 0x02, 0x02, 0x40, 0x20, 0x01, + 0x2d, 0x00, 0x00, 0x0d, 0x00, 0x20, 0x02, 0x21, 0x05, 0x0c, 0x04, 0x0b, + 0x20, 0x02, 0x41, 0x04, 0x49, 0x0d, 0x00, 0x03, 0x40, 0x20, 0x01, 0x28, + 0x02, 0x00, 0x22, 0x00, 0x41, 0x7f, 0x73, 0x20, 0x00, 0x41, 0xff, 0xfd, + 0xfb, 0x77, 0x6a, 0x71, 0x41, 0x80, 0x81, 0x82, 0x84, 0x78, 0x71, 0x0d, + 0x02, 0x20, 0x03, 0x20, 0x00, 0x36, 0x02, 0x00, 0x20, 0x03, 0x41, 0x04, + 0x6a, 0x21, 0x03, 0x20, 0x01, 0x41, 0x04, 0x6a, 0x21, 0x01, 0x20, 0x02, + 0x41, 0x7c, 0x6a, 0x22, 0x02, 0x41, 0x03, 0x4b, 0x0d, 0x00, 0x0b, 0x0b, + 0x20, 0x02, 0x45, 0x0d, 0x01, 0x0b, 0x03, 0x40, 0x20, 0x03, 0x20, 0x01, + 0x2d, 0x00, 0x00, 0x22, 0x00, 0x3a, 0x00, 0x00, 0x02, 0x40, 0x20, 0x00, + 0x0d, 0x00, 0x20, 0x02, 0x21, 0x05, 0x0c, 0x03, 0x0b, 0x20, 0x03, 0x41, + 0x01, 0x6a, 0x21, 0x03, 0x20, 0x01, 0x41, 0x01, 0x6a, 0x21, 0x01, 0x20, + 0x02, 0x41, 0x7f, 0x6a, 0x22, 0x02, 0x0d, 0x00, 0x0b, 0x0b, 0x41, 0x00, + 0x21, 0x05, 0x0b, 0x20, 0x03, 0x41, 0x00, 0x20, 0x05, 0x10, 0x94, 0x80, + 0x80, 0x80, 0x00, 0x0b, 0x11, 0x00, 0x20, 0x00, 0x20, 0x01, 0x20, 0x02, + 0x10, 0xa5, 0x80, 0x80, 0x80, 0x00, 0x1a, 0x20, 0x00, 0x0b, 0x17, 0x00, + 0x20, 0x00, 0x41, 0x50, 0x6a, 0x41, 0x0a, 0x49, 0x20, 0x00, 0x41, 0x20, + 0x72, 0x41, 0x9f, 0x7f, 0x6a, 0x41, 0x06, 0x49, 0x72, 0x0b, 0x2a, 0x01, + 0x03, 0x7f, 0x41, 0x00, 0x21, 0x01, 0x03, 0x40, 0x20, 0x00, 0x20, 0x01, + 0x6a, 0x21, 0x02, 0x20, 0x01, 0x41, 0x04, 0x6a, 0x22, 0x03, 0x21, 0x01, + 0x20, 0x02, 0x28, 0x02, 0x00, 0x0d, 0x00, 0x0b, 0x20, 0x03, 0x41, 0x7c, + 0x6a, 0x41, 0x02, 0x75, 0x0b, 0x45, 0x01, 0x01, 0x7f, 0x02, 0x40, 0x20, + 0x01, 0x45, 0x0d, 0x00, 0x20, 0x00, 0x41, 0x7c, 0x6a, 0x21, 0x00, 0x02, + 0x40, 0x03, 0x40, 0x20, 0x00, 0x41, 0x04, 0x6a, 0x22, 0x00, 0x28, 0x02, + 0x00, 0x22, 0x02, 0x45, 0x0d, 0x01, 0x20, 0x02, 0x20, 0x01, 0x47, 0x0d, + 0x00, 0x0b, 0x0b, 0x20, 0x00, 0x41, 0x00, 0x20, 0x02, 0x1b, 0x0f, 0x0b, + 0x20, 0x00, 0x20, 0x00, 0x10, 0xa8, 0x80, 0x80, 0x80, 0x00, 0x41, 0x02, + 0x74, 0x6a, 0x0b, 0x1d, 0x00, 0x02, 0x40, 0x20, 0x00, 0x0d, 0x00, 0x41, + 0x00, 0x0f, 0x0b, 0x41, 0x90, 0xc2, 0x84, 0x80, 0x00, 0x20, 0x00, 0x10, + 0xa9, 0x80, 0x80, 0x80, 0x00, 0x41, 0x00, 0x47, 0x0b, 0x24, 0x01, 0x01, + 0x7f, 0x41, 0x01, 0x21, 0x01, 0x02, 0x40, 0x20, 0x00, 0x41, 0x50, 0x6a, + 0x41, 0x0a, 0x49, 0x0d, 0x00, 0x20, 0x00, 0x10, 0x96, 0x80, 0x80, 0x80, + 0x00, 0x41, 0x00, 0x47, 0x21, 0x01, 0x0b, 0x20, 0x01, 0x0b, 0x0b, 0xf1, + 0x42, 0x01, 0x00, 0x41, 0x80, 0x80, 0x04, 0x0b, 0xe8, 0x42, 0x12, 0x11, + 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, + 0x1f, 0x20, 0x21, 0x11, 0x22, 0x23, 0x24, 0x11, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x2b, 0x2c, 0x11, 0x2d, 0x2e, 0x2f, 0x10, 0x10, 0x30, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x31, 0x32, 0x33, 0x10, 0x34, 0x35, + 0x10, 0x10, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x36, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x37, 0x11, 0x11, 0x11, 0x11, 0x38, 0x11, 0x39, 0x3a, 0x3b, 0x3c, + 0x3d, 0x3e, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x3f, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x11, 0x40, 0x41, 0x11, 0x42, + 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x11, 0x4b, 0x4c, 0x4d, + 0x4e, 0x4f, 0x50, 0x51, 0x10, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x10, 0x5e, 0x5f, 0x60, 0x10, 0x11, 0x11, + 0x11, 0x61, 0x62, 0x63, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x11, 0x11, 0x11, 0x11, 0x64, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, + 0x65, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, + 0x66, 0x67, 0x10, 0x10, 0x68, 0x69, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x11, 0x11, 0x11, 0x6a, 0x11, 0x11, 0x6b, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x11, 0x6c, + 0x6d, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x6e, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x6f, 0x70, + 0x71, 0x72, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x73, 0x74, + 0x75, 0x10, 0x10, 0x10, 0x10, 0x10, 0x76, 0x77, 0x10, 0x10, 0x10, 0x10, + 0x78, 0x10, 0x10, 0x79, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, + 0xff, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x20, 0x04, 0xff, 0xff, + 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0xff, 0x03, 0x00, 0x1f, 0x50, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xdf, 0xbc, 0x40, 0xd7, 0xff, 0xff, 0xfb, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbf, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x03, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xfe, 0xff, 0xff, 0xff, 0x7f, 0x02, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xbf, 0xb6, 0x00, 0xff, 0xff, 0xff, 0x87, + 0x07, 0x00, 0x00, 0x00, 0xff, 0x07, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xfe, 0xff, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xef, 0x1f, 0xfe, 0xe1, 0xff, 0x9f, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0xe0, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x07, 0x30, 0x04, 0xff, 0xff, 0xff, 0xfc, 0xff, 0x1f, + 0x00, 0x00, 0xff, 0xff, 0xff, 0x01, 0xff, 0x07, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xdf, 0x3f, 0x00, 0x00, 0xf0, 0xff, 0xf8, 0x03, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0xff, 0xdf, + 0xe1, 0xff, 0xcf, 0xff, 0xfe, 0xff, 0xef, 0x9f, 0xf9, 0xff, 0xff, 0xfd, + 0xc5, 0xe3, 0x9f, 0x59, 0x80, 0xb0, 0xcf, 0xff, 0x03, 0x10, 0xee, 0x87, + 0xf9, 0xff, 0xff, 0xfd, 0x6d, 0xc3, 0x87, 0x19, 0x02, 0x5e, 0xc0, 0xff, + 0x3f, 0x00, 0xee, 0xbf, 0xfb, 0xff, 0xff, 0xfd, 0xed, 0xe3, 0xbf, 0x1b, + 0x01, 0x00, 0xcf, 0xff, 0x00, 0x1e, 0xee, 0x9f, 0xf9, 0xff, 0xff, 0xfd, + 0xed, 0xe3, 0x9f, 0x19, 0xc0, 0xb0, 0xcf, 0xff, 0x02, 0x00, 0xec, 0xc7, + 0x3d, 0xd6, 0x18, 0xc7, 0xff, 0xc3, 0xc7, 0x1d, 0x81, 0x00, 0xc0, 0xff, + 0x00, 0x00, 0xef, 0xdf, 0xfd, 0xff, 0xff, 0xfd, 0xff, 0xe3, 0xdf, 0x1d, + 0x60, 0x07, 0xcf, 0xff, 0x00, 0x00, 0xef, 0xdf, 0xfd, 0xff, 0xff, 0xfd, + 0xef, 0xe3, 0xdf, 0x1d, 0x60, 0x40, 0xcf, 0xff, 0x06, 0x00, 0xef, 0xdf, + 0xfd, 0xff, 0xff, 0xff, 0xff, 0xe7, 0xdf, 0x5d, 0xf0, 0x80, 0xcf, 0xff, + 0x00, 0xfc, 0xec, 0xff, 0x7f, 0xfc, 0xff, 0xff, 0xfb, 0x2f, 0x7f, 0x80, + 0x5f, 0xff, 0xc0, 0xff, 0x0c, 0x00, 0xfe, 0xff, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0x07, 0x3f, 0x20, 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0xd6, 0xf7, + 0xff, 0xff, 0xaf, 0xff, 0xff, 0x3b, 0x5f, 0x20, 0xff, 0xf3, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xff, 0x03, 0x00, 0x00, 0xff, 0xfe, + 0xff, 0xff, 0xff, 0x1f, 0xfe, 0xff, 0x03, 0xff, 0xff, 0xfe, 0xff, 0xff, + 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x7f, 0xf9, 0xff, 0x03, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xff, 0xff, 0xbf, 0x20, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x3d, 0x7f, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3d, + 0xff, 0xff, 0xff, 0xff, 0x3d, 0x7f, 0x3d, 0xff, 0x7f, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x07, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0x3f, 0xfe, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x9f, 0xff, 0xff, 0xfe, 0xff, + 0xff, 0x07, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7, + 0xff, 0x01, 0xff, 0xdf, 0x0f, 0x00, 0xff, 0xff, 0x0f, 0x00, 0xff, 0xff, + 0x0f, 0x00, 0xff, 0xdf, 0x0d, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xcf, 0xff, 0xff, 0x01, 0x80, 0x10, 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, + 0xff, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x3f, 0x00, 0xff, 0xff, 0xff, 0x7f, 0xff, 0x0f, + 0xff, 0x01, 0xc0, 0xff, 0xff, 0xff, 0xff, 0x3f, 0x1f, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x0f, 0xff, 0xff, 0xff, 0x03, 0xff, 0x03, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x7f, 0xfe, 0xff, 0x1f, 0x00, 0xff, 0x03, 0xff, 0x03, 0x80, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xef, 0xff, 0xef, 0x0f, 0xff, 0x03, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf3, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xbf, 0xff, 0x03, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x7f, 0x00, 0xff, 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0x01, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xe7, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, + 0x6f, 0x04, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0x1f, 0x00, 0xff, 0xff, + 0x3f, 0x3f, 0xff, 0xff, 0xff, 0xff, 0x3f, 0x3f, 0xff, 0xaa, 0xff, 0xff, + 0xff, 0x3f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xdf, 0x5f, 0xdc, 0x1f, + 0xcf, 0x0f, 0xff, 0x1f, 0xdc, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x80, 0x00, 0x00, + 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x84, 0xfc, 0x2f, 0x3e, 0x50, 0xbd, 0xff, 0xf3, 0xe0, 0x43, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x78, + 0x0c, 0x00, 0xff, 0xff, 0xff, 0xff, 0xbf, 0x20, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x80, 0x00, 0x00, 0xff, 0xff, 0x7f, 0x00, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x00, 0x00, 0x00, 0xfe, 0x03, + 0x3e, 0x1f, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x7f, 0xe0, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xf7, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, 0x00, 0xff, 0xff, + 0xff, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0x1f, + 0xff, 0xff, 0xff, 0x0f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, + 0xf0, 0x8f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x80, 0xff, 0xfc, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf9, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x80, 0xff, 0xbf, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x0f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x2f, 0x00, 0xff, 0x03, 0x00, 0x00, 0xfc, 0xe8, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x07, 0xff, 0xff, 0xff, 0xff, 0x07, 0x00, 0xff, 0xff, + 0xff, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf7, 0xff, 0x00, 0x80, + 0xff, 0x03, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x7f, 0x00, 0xff, 0x3f, 0xff, 0x03, 0xff, 0xff, 0x7f, 0xfc, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0x05, 0x00, 0x00, 0x38, 0xff, 0xff, + 0x3c, 0x00, 0x7e, 0x7e, 0x7e, 0x00, 0x7f, 0x7f, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xf7, 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0xff, 0x03, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0f, 0x00, 0xff, 0xff, 0x7f, 0xf8, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x7f, 0x00, 0xf8, 0xe0, 0xff, 0xfd, 0x7f, 0x5f, 0xdb, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x03, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdf, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x1f, 0x00, 0x00, 0xff, 0x03, 0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, + 0xff, 0x07, 0xc0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x7f, 0xfc, 0xfc, 0xfc, 0x1c, 0x00, 0x00, 0x00, 0x00, 0xff, 0xef, + 0xff, 0xff, 0x7f, 0xff, 0xff, 0xb7, 0xff, 0x3f, 0xff, 0x3f, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x1f, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0x00, 0xe0, 0xff, 0xff, 0xff, 0x07, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x07, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xff, 0xff, 0x0f, 0xff, + 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x3f, 0xff, 0x03, 0xff, 0xff, 0xff, 0xff, 0x0f, 0xff, 0xff, 0xff, + 0xff, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, 0xff, 0xff, 0x3f, 0x00, 0xff, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0xfd, 0xff, 0xff, 0xff, 0xff, + 0xbf, 0x91, 0xff, 0xff, 0x3f, 0x00, 0xff, 0xff, 0x7f, 0x00, 0xff, 0xff, + 0xff, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0x37, 0x00, 0xff, 0xff, 0x3f, 0x00, 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6f, 0xf0, + 0xef, 0xfe, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0x1f, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, 0xff, 0xfe, + 0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x3f, 0x00, 0xff, 0xff, 0x3f, 0x00, 0xff, 0xff, 0x07, 0x00, 0xff, 0xff, + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x07, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x00, 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x1f, 0x80, 0x00, + 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0x7f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0x00, + 0x00, 0x00, 0xc0, 0xff, 0x00, 0x00, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x01, 0x00, 0x00, 0xff, 0xff, 0xff, 0x01, 0xff, 0x03, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xc7, 0xff, 0x70, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x47, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1e, 0x00, + 0xff, 0x17, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, + 0x9f, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xbd, + 0xff, 0xbf, 0xff, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, + 0xff, 0x03, 0xef, 0x9f, 0xf9, 0xff, 0xff, 0xfd, 0xed, 0xe3, 0x9f, 0x19, + 0x81, 0xe0, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbb, 0x07, 0xff, 0x83, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb3, 0x00, + 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x3f, 0x7f, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0x11, 0x00, + 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x3f, 0x01, 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xe7, 0xff, 0x07, 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03, + 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xfc, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x1a, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xe7, 0x7f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, + 0x7f, 0x7f, 0x01, 0x00, 0xff, 0x03, 0x00, 0x00, 0xfc, 0xff, 0xff, 0xff, + 0xfc, 0xff, 0xff, 0xfe, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x7f, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xb4, 0xcb, 0x00, + 0xff, 0x03, 0xbf, 0xfd, 0xff, 0xff, 0xff, 0x7f, 0x7b, 0x01, 0xff, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0x7f, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x7f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 0xff, 0xff, 0xff, 0x7f, 0xff, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0x3f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x0f, 0x00, 0xff, 0x03, 0xf8, 0xff, 0xff, 0xe0, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x87, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x80, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07, 0x00, 0xff, 0xff, + 0xff, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0xf0, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x07, 0xff, 0x1f, 0xff, 0x01, 0xff, 0x43, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xdf, 0x64, 0xde, 0xff, 0xeb, 0xef, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbf, 0xe7, 0xdf, 0xdf, 0xff, 0xff, + 0xff, 0x7b, 0x5f, 0xfc, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, + 0xff, 0xff, 0xfd, 0xff, 0xff, 0xf7, 0xff, 0xff, 0xff, 0xf7, 0xff, 0xff, + 0xdf, 0xff, 0xff, 0xff, 0xdf, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xf7, 0xcf, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xf9, 0xdb, 0x07, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x80, 0x3f, 0xff, 0x43, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x0f, 0xff, 0x03, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x8f, 0x08, + 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xef, 0xff, + 0xff, 0xff, 0x96, 0xfe, 0xf7, 0x0a, 0x84, 0xea, 0x96, 0xaa, 0x96, 0xf7, + 0xf7, 0x5e, 0xff, 0xfb, 0xff, 0x0f, 0xee, 0xfb, 0xff, 0x0f, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0x03, 0xff, 0xff, 0xff, 0x03, 0xff, 0xff, 0xff, 0x03, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x56, 0x01, 0x00, 0x00, 0x39, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x20, + 0x00, 0x00, 0x00, 0xe0, 0xff, 0xff, 0x00, 0xbf, 0x1d, 0x00, 0x00, 0xe7, + 0x02, 0x00, 0x00, 0x79, 0x00, 0x00, 0x02, 0x24, 0x00, 0x00, 0x01, 0x01, + 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, + 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x01, 0x39, 0xff, 0xff, 0x00, 0x18, + 0xff, 0xff, 0x01, 0x87, 0xff, 0xff, 0x00, 0xd4, 0xfe, 0xff, 0x00, 0xc3, + 0x00, 0x00, 0x01, 0xd2, 0x00, 0x00, 0x01, 0xce, 0x00, 0x00, 0x01, 0xcd, + 0x00, 0x00, 0x01, 0x4f, 0x00, 0x00, 0x01, 0xca, 0x00, 0x00, 0x01, 0xcb, + 0x00, 0x00, 0x01, 0xcf, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x01, 0xd3, + 0x00, 0x00, 0x01, 0xd1, 0x00, 0x00, 0x00, 0xa3, 0x00, 0x00, 0x01, 0xd5, + 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x01, 0xd6, 0x00, 0x00, 0x01, 0xda, + 0x00, 0x00, 0x01, 0xd9, 0x00, 0x00, 0x01, 0xdb, 0x00, 0x00, 0x00, 0x38, + 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0xb1, 0xff, 0xff, 0x01, 0x9f, + 0xff, 0xff, 0x01, 0xc8, 0xff, 0xff, 0x02, 0x28, 0x24, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x33, + 0xff, 0xff, 0x00, 0x26, 0xff, 0xff, 0x01, 0x7e, 0xff, 0xff, 0x01, 0x2b, + 0x2a, 0x00, 0x01, 0x5d, 0xff, 0xff, 0x01, 0x28, 0x2a, 0x00, 0x00, 0x3f, + 0x2a, 0x00, 0x01, 0x3d, 0xff, 0xff, 0x01, 0x45, 0x00, 0x00, 0x01, 0x47, + 0x00, 0x00, 0x00, 0x1f, 0x2a, 0x00, 0x00, 0x1c, 0x2a, 0x00, 0x00, 0x1e, + 0x2a, 0x00, 0x00, 0x2e, 0xff, 0xff, 0x00, 0x32, 0xff, 0xff, 0x00, 0x36, + 0xff, 0xff, 0x00, 0x35, 0xff, 0xff, 0x00, 0x4f, 0xa5, 0x00, 0x00, 0x4b, + 0xa5, 0x00, 0x00, 0x31, 0xff, 0xff, 0x00, 0x28, 0xa5, 0x00, 0x00, 0x44, + 0xa5, 0x00, 0x00, 0x2f, 0xff, 0xff, 0x00, 0x2d, 0xff, 0xff, 0x00, 0xf7, + 0x29, 0x00, 0x00, 0x41, 0xa5, 0x00, 0x00, 0xfd, 0x29, 0x00, 0x00, 0x2b, + 0xff, 0xff, 0x00, 0x2a, 0xff, 0xff, 0x00, 0xe7, 0x29, 0x00, 0x00, 0x43, + 0xa5, 0x00, 0x00, 0x2a, 0xa5, 0x00, 0x00, 0xbb, 0xff, 0xff, 0x00, 0x27, + 0xff, 0xff, 0x00, 0xb9, 0xff, 0xff, 0x00, 0x25, 0xff, 0xff, 0x00, 0x15, + 0xa5, 0x00, 0x00, 0x12, 0xa5, 0x00, 0x02, 0x24, 0x4c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0xe0, 0xff, 0xff, 0x01, 0x01, + 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x54, 0x00, 0x00, 0x01, 0x74, + 0x00, 0x00, 0x01, 0x26, 0x00, 0x00, 0x01, 0x25, 0x00, 0x00, 0x01, 0x40, + 0x00, 0x00, 0x01, 0x3f, 0x00, 0x00, 0x00, 0xda, 0xff, 0xff, 0x00, 0xdb, + 0xff, 0xff, 0x00, 0xe1, 0xff, 0xff, 0x00, 0xc0, 0xff, 0xff, 0x00, 0xc1, + 0xff, 0xff, 0x01, 0x08, 0x00, 0x00, 0x00, 0xc2, 0xff, 0xff, 0x00, 0xc7, + 0xff, 0xff, 0x00, 0xd1, 0xff, 0xff, 0x00, 0xca, 0xff, 0xff, 0x00, 0xf8, + 0xff, 0xff, 0x00, 0xaa, 0xff, 0xff, 0x00, 0xb0, 0xff, 0xff, 0x00, 0x07, + 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0x01, 0xc4, 0xff, 0xff, 0x00, 0xa0, + 0xff, 0xff, 0x01, 0xf9, 0xff, 0xff, 0x02, 0x1a, 0x70, 0x00, 0x01, 0x01, + 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x01, 0x20, 0x00, 0x00, 0x00, 0xe0, + 0xff, 0xff, 0x01, 0x50, 0x00, 0x00, 0x01, 0x0f, 0x00, 0x00, 0x00, 0xf1, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x30, 0x00, 0x00, 0x00, 0xd0, + 0xff, 0xff, 0x01, 0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xc0, 0x0b, 0x00, 0x01, 0x60, 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0xd0, 0x97, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0xf8, + 0xff, 0xff, 0x02, 0x05, 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, + 0xf4, 0xff, 0x00, 0x9e, 0xe7, 0xff, 0x00, 0xc2, 0x89, 0x00, 0x00, 0xdb, + 0xe7, 0xff, 0x00, 0x92, 0xe7, 0xff, 0x00, 0x93, 0xe7, 0xff, 0x00, 0x9c, + 0xe7, 0xff, 0x00, 0x9d, 0xe7, 0xff, 0x00, 0xa4, 0xe7, 0xff, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x38, 0x8a, 0x00, 0x00, 0x04, 0x8a, 0x00, 0x00, 0xe6, + 0x0e, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xc5, 0xff, 0xff, 0x01, 0x41, 0xe2, 0xff, 0x02, 0x1d, + 0x8f, 0x00, 0x00, 0x08, 0x00, 0x00, 0x01, 0xf8, 0xff, 0xff, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x01, 0xaa, 0xff, 0xff, 0x00, 0x4a, + 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x70, + 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x01, 0xb6, + 0xff, 0xff, 0x01, 0xf7, 0xff, 0xff, 0x00, 0xdb, 0xe3, 0xff, 0x01, 0x9c, + 0xff, 0xff, 0x01, 0x90, 0xff, 0xff, 0x01, 0x80, 0xff, 0xff, 0x01, 0x82, + 0xff, 0xff, 0x02, 0x05, 0xac, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x10, + 0x00, 0x00, 0x00, 0xf0, 0xff, 0xff, 0x01, 0x1c, 0x00, 0x00, 0x01, 0x01, + 0x00, 0x00, 0x01, 0xa3, 0xe2, 0xff, 0x01, 0x41, 0xdf, 0xff, 0x01, 0xba, + 0xdf, 0xff, 0x00, 0xe4, 0xff, 0xff, 0x02, 0x0b, 0xb1, 0x00, 0x01, 0x01, + 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x01, 0x30, 0x00, 0x00, 0x00, 0xd0, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x09, 0xd6, 0xff, 0x01, 0x1a, + 0xf1, 0xff, 0x01, 0x19, 0xd6, 0xff, 0x00, 0xd5, 0xd5, 0xff, 0x00, 0xd8, + 0xd5, 0xff, 0x01, 0xe4, 0xd5, 0xff, 0x01, 0x03, 0xd6, 0xff, 0x01, 0xe1, + 0xd5, 0xff, 0x01, 0xe2, 0xd5, 0xff, 0x01, 0xc1, 0xd5, 0xff, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xa0, 0xe3, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x02, 0x0c, 0xbc, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x01, 0xbc, + 0x5a, 0xff, 0x01, 0xa0, 0x03, 0x00, 0x01, 0xfc, 0x75, 0xff, 0x01, 0xd8, + 0x5a, 0xff, 0x00, 0x30, 0x00, 0x00, 0x01, 0xb1, 0x5a, 0xff, 0x01, 0xb5, + 0x5a, 0xff, 0x01, 0xbf, 0x5a, 0xff, 0x01, 0xee, 0x5a, 0xff, 0x01, 0xd6, + 0x5a, 0xff, 0x01, 0xeb, 0x5a, 0xff, 0x01, 0xd0, 0xff, 0xff, 0x01, 0xbd, + 0x5a, 0xff, 0x01, 0xc8, 0x75, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, + 0x68, 0xff, 0x00, 0x60, 0xfc, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x20, + 0x00, 0x00, 0x00, 0xe0, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x28, + 0x00, 0x00, 0x00, 0xd8, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, + 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x20, + 0x00, 0x00, 0x00, 0xe0, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x20, + 0x00, 0x00, 0x00, 0xe0, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x22, + 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0x30, 0x0c, 0x31, 0x0d, 0x78, 0x0e, + 0x7f, 0x0f, 0x80, 0x10, 0x81, 0x11, 0x86, 0x12, 0x89, 0x13, 0x8a, 0x13, + 0x8e, 0x14, 0x8f, 0x15, 0x90, 0x16, 0x93, 0x13, 0x94, 0x17, 0x95, 0x18, + 0x96, 0x19, 0x97, 0x1a, 0x9a, 0x1b, 0x9c, 0x19, 0x9d, 0x1c, 0x9e, 0x1d, + 0x9f, 0x1e, 0xa6, 0x1f, 0xa9, 0x1f, 0xae, 0x1f, 0xb1, 0x20, 0xb2, 0x20, + 0xb7, 0x21, 0xbf, 0x22, 0xc5, 0x23, 0xc8, 0x23, 0xcb, 0x23, 0xdd, 0x24, + 0xf2, 0x23, 0xf6, 0x25, 0xf7, 0x26, 0x20, 0x2d, 0x3a, 0x2e, 0x3d, 0x2f, + 0x3e, 0x30, 0x3f, 0x31, 0x40, 0x31, 0x43, 0x32, 0x44, 0x33, 0x45, 0x34, + 0x50, 0x35, 0x51, 0x36, 0x52, 0x37, 0x53, 0x38, 0x54, 0x39, 0x59, 0x3a, + 0x5b, 0x3b, 0x5c, 0x3c, 0x61, 0x3d, 0x63, 0x3e, 0x65, 0x3f, 0x66, 0x40, + 0x68, 0x41, 0x69, 0x42, 0x6a, 0x40, 0x6b, 0x43, 0x6c, 0x44, 0x6f, 0x42, + 0x71, 0x45, 0x72, 0x46, 0x75, 0x47, 0x7d, 0x48, 0x82, 0x49, 0x87, 0x4a, + 0x89, 0x4b, 0x8a, 0x4c, 0x8b, 0x4c, 0x8c, 0x4d, 0x92, 0x4e, 0x9d, 0x4f, + 0x9e, 0x50, 0x45, 0x57, 0x7b, 0x1d, 0x7c, 0x1d, 0x7d, 0x1d, 0x7f, 0x58, + 0x86, 0x59, 0x88, 0x5a, 0x89, 0x5a, 0x8a, 0x5a, 0x8c, 0x5b, 0x8e, 0x5c, + 0x8f, 0x5c, 0xac, 0x5d, 0xad, 0x5e, 0xae, 0x5e, 0xaf, 0x5e, 0xc2, 0x5f, + 0xcc, 0x60, 0xcd, 0x61, 0xce, 0x61, 0xcf, 0x62, 0xd0, 0x63, 0xd1, 0x64, + 0xd5, 0x65, 0xd6, 0x66, 0xd7, 0x67, 0xf0, 0x68, 0xf1, 0x69, 0xf2, 0x6a, + 0xf3, 0x6b, 0xf4, 0x6c, 0xf5, 0x6d, 0xf9, 0x6e, 0xfd, 0x2d, 0xfe, 0x2d, + 0xff, 0x2d, 0x50, 0x69, 0x51, 0x69, 0x52, 0x69, 0x53, 0x69, 0x54, 0x69, + 0x55, 0x69, 0x56, 0x69, 0x57, 0x69, 0x58, 0x69, 0x59, 0x69, 0x5a, 0x69, + 0x5b, 0x69, 0x5c, 0x69, 0x5d, 0x69, 0x5e, 0x69, 0x5f, 0x69, 0x82, 0x00, + 0x83, 0x00, 0x84, 0x00, 0x85, 0x00, 0x86, 0x00, 0x87, 0x00, 0x88, 0x00, + 0x89, 0x00, 0xc0, 0x75, 0xcf, 0x76, 0x80, 0x89, 0x81, 0x8a, 0x82, 0x8b, + 0x85, 0x8c, 0x86, 0x8d, 0x70, 0x9d, 0x71, 0x9d, 0x76, 0x9e, 0x77, 0x9e, + 0x78, 0x9f, 0x79, 0x9f, 0x7a, 0xa0, 0x7b, 0xa0, 0x7c, 0xa1, 0x7d, 0xa1, + 0xb3, 0xa2, 0xba, 0xa3, 0xbb, 0xa3, 0xbc, 0xa4, 0xbe, 0xa5, 0xc3, 0xa2, + 0xcc, 0xa4, 0xda, 0xa6, 0xdb, 0xa6, 0xe5, 0x6a, 0xea, 0xa7, 0xeb, 0xa7, + 0xec, 0x6e, 0xf3, 0xa2, 0xf8, 0xa8, 0xf9, 0xa8, 0xfa, 0xa9, 0xfb, 0xa9, + 0xfc, 0xa4, 0x26, 0xb0, 0x2a, 0xb1, 0x2b, 0xb2, 0x4e, 0xb3, 0x84, 0x08, + 0x62, 0xba, 0x63, 0xbb, 0x64, 0xbc, 0x65, 0xbd, 0x66, 0xbe, 0x6d, 0xbf, + 0x6e, 0xc0, 0x6f, 0xc1, 0x70, 0xc2, 0x7e, 0xc3, 0x7f, 0xc3, 0x7d, 0xcf, + 0x8d, 0xd0, 0x94, 0xd1, 0xab, 0xd2, 0xac, 0xd3, 0xad, 0xd4, 0xb0, 0xd5, + 0xb1, 0xd6, 0xb2, 0xd7, 0xc4, 0xd8, 0xc5, 0xd9, 0xc6, 0xda, 0x07, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x0d, 0x06, 0x06, 0x0e, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x0f, 0x10, 0x11, 0x12, 0x06, 0x13, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x14, 0x15, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x16, 0x17, 0x06, 0x06, + 0x06, 0x18, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x19, 0x06, 0x06, 0x06, 0x06, 0x1a, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x1b, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x1c, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x1d, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x1e, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x01, 0x00, 0x54, 0x56, 0x56, 0x56, + 0x56, 0x56, 0x56, 0x56, 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x07, + 0x2b, 0x2b, 0x5b, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x4a, 0x56, + 0x56, 0x05, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, + 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x24, 0x50, 0x79, 0x31, 0x50, 0x31, + 0x50, 0x31, 0x38, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, + 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x4e, 0x31, 0x02, 0x4e, 0x0d, 0x0d, + 0x4e, 0x03, 0x4e, 0x00, 0x24, 0x6e, 0x00, 0x4e, 0x31, 0x26, 0x6e, 0x51, + 0x4e, 0x24, 0x50, 0x4e, 0x39, 0x14, 0x81, 0x1b, 0x1d, 0x1d, 0x53, 0x31, + 0x50, 0x31, 0x50, 0x0d, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x1b, 0x53, + 0x24, 0x50, 0x31, 0x02, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, + 0x5c, 0x7b, 0x14, 0x79, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x2d, 0x2b, 0x49, + 0x03, 0x48, 0x03, 0x78, 0x5c, 0x7b, 0x14, 0x00, 0x96, 0x0a, 0x01, 0x2b, + 0x28, 0x06, 0x06, 0x00, 0x2a, 0x06, 0x2a, 0x2a, 0x2b, 0x07, 0xbb, 0xb5, + 0x2b, 0x1e, 0x00, 0x2b, 0x07, 0x2b, 0x2b, 0x2b, 0x01, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x01, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0xcd, 0x46, 0xcd, 0x2b, 0x00, + 0x25, 0x2b, 0x07, 0x01, 0x06, 0x01, 0x55, 0x56, 0x56, 0x56, 0x56, 0x56, + 0x55, 0x56, 0x56, 0x02, 0x24, 0x81, 0x81, 0x81, 0x81, 0x81, 0x15, 0x81, + 0x81, 0x81, 0x00, 0x00, 0x2b, 0x00, 0xb2, 0xd1, 0xb2, 0xd1, 0xb2, 0xd1, + 0xb2, 0xd1, 0x00, 0x00, 0xcd, 0xcc, 0x01, 0x00, 0xd7, 0xd7, 0xd7, 0xd7, + 0xd7, 0x83, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, + 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0x1c, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, + 0x31, 0x50, 0x31, 0x02, 0x00, 0x00, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, + 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, + 0x4e, 0x31, 0x50, 0x31, 0x50, 0x4e, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, + 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x02, 0x87, 0xa6, + 0x87, 0xa6, 0x87, 0xa6, 0x87, 0xa6, 0x87, 0xa6, 0x87, 0xa6, 0x87, 0xa6, + 0x87, 0xa6, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x00, 0x00, 0x00, 0x54, 0x56, 0x56, 0x56, 0x56, 0x56, + 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, + 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x0c, 0x00, 0x0c, 0x2a, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x07, + 0x2a, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x56, 0x56, + 0x6c, 0x81, 0x15, 0x00, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x07, 0x6c, + 0x03, 0x41, 0x2b, 0x2b, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, + 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x2c, 0x56, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x6c, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x25, 0x06, 0x25, + 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, + 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, + 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, + 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x56, 0x7a, + 0x9e, 0x26, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, + 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, + 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x25, 0x06, 0x01, 0x2b, 0x2b, + 0x4f, 0x56, 0x56, 0x2c, 0x2b, 0x7f, 0x56, 0x56, 0x39, 0x2b, 0x2b, 0x55, + 0x56, 0x56, 0x2b, 0x2b, 0x4f, 0x56, 0x56, 0x2c, 0x2b, 0x7f, 0x56, 0x56, + 0x81, 0x37, 0x75, 0x5b, 0x7b, 0x5c, 0x2b, 0x2b, 0x4f, 0x56, 0x56, 0x02, + 0xac, 0x04, 0x00, 0x00, 0x39, 0x2b, 0x2b, 0x55, 0x56, 0x56, 0x2b, 0x2b, + 0x4f, 0x56, 0x56, 0x2c, 0x2b, 0x2b, 0x56, 0x56, 0x32, 0x13, 0x81, 0x57, + 0x00, 0x6f, 0x81, 0x7e, 0xc9, 0xd7, 0x7e, 0x2d, 0x81, 0x81, 0x0e, 0x7e, + 0x39, 0x7f, 0x6f, 0x57, 0x00, 0x81, 0x81, 0x7e, 0x15, 0x00, 0x7e, 0x03, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x07, 0x2b, 0x24, 0x2b, 0x97, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x56, 0x56, 0x56, 0x56, + 0x56, 0x80, 0x81, 0x81, 0x81, 0x81, 0x39, 0xbb, 0x2a, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x01, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, + 0x81, 0x81, 0x81, 0x81, 0x81, 0xc9, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, + 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xd0, 0x0d, 0x00, + 0x4e, 0x31, 0x02, 0xb4, 0xc1, 0xc1, 0xd7, 0xd7, 0x24, 0x50, 0x31, 0x50, + 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, + 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, + 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0xd7, 0xd7, 0x53, 0xc1, 0x47, 0xd4, + 0xd7, 0xd7, 0xd7, 0x05, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x07, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x4e, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, + 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x31, 0x50, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x79, 0x5c, 0x7b, 0x5c, 0x7b, + 0x4f, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, + 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x7b, 0x5c, 0x2d, 0x2b, 0x2b, + 0x79, 0x14, 0x5c, 0x7b, 0x5c, 0x2d, 0x79, 0x2a, 0x5c, 0x27, 0x5c, 0x7b, + 0x5c, 0x7b, 0x5c, 0x7b, 0xa4, 0x00, 0x0a, 0xb4, 0x5c, 0x7b, 0x5c, 0x7b, + 0x4f, 0x03, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x07, 0x00, 0x48, 0x56, 0x56, + 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x55, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, + 0x56, 0x56, 0x56, 0x56, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x07, 0x00, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, + 0x56, 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x07, 0x00, 0x00, 0x00, 0x00, 0x56, 0x56, 0x56, 0x56, + 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, + 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x56, 0x56, + 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x0e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, + 0x56, 0x56, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x2b, + 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x55, 0x56, 0x56, + 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x56, 0x0e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x27, 0x51, 0x6f, 0x77, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, + 0x00, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x83, 0x8e, + 0x92, 0x97, 0x00, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xb4, 0xc4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc9, 0x00, 0x00, 0x00, 0xdb, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0x00, 0x00, + 0x00, 0x00, 0xe1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe4, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe7, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xea, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xed, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0a, 0x00, + 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, + 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x01, 0x20, + 0x00, 0x00, 0x02, 0x20, 0x00, 0x00, 0x03, 0x20, 0x00, 0x00, 0x04, 0x20, + 0x00, 0x00, 0x05, 0x20, 0x00, 0x00, 0x06, 0x20, 0x00, 0x00, 0x08, 0x20, + 0x00, 0x00, 0x09, 0x20, 0x00, 0x00, 0x0a, 0x20, 0x00, 0x00, 0x28, 0x20, + 0x00, 0x00, 0x29, 0x20, 0x00, 0x00, 0x5f, 0x20, 0x00, 0x00, 0x00, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x92, 0x05, 0x04, 0x6e, 0x61, + 0x6d, 0x65, 0x01, 0xc9, 0x04, 0x2c, 0x00, 0x2a, 0x5f, 0x5f, 0x69, 0x6d, + 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x5f, 0x77, 0x61, 0x73, 0x69, 0x5f, + 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x5f, 0x70, 0x72, 0x65, + 0x76, 0x69, 0x65, 0x77, 0x31, 0x5f, 0x61, 0x72, 0x67, 0x73, 0x5f, 0x67, + 0x65, 0x74, 0x01, 0x30, 0x5f, 0x5f, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, + 0x65, 0x64, 0x5f, 0x77, 0x61, 0x73, 0x69, 0x5f, 0x73, 0x6e, 0x61, 0x70, + 0x73, 0x68, 0x6f, 0x74, 0x5f, 0x70, 0x72, 0x65, 0x76, 0x69, 0x65, 0x77, + 0x31, 0x5f, 0x61, 0x72, 0x67, 0x73, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x73, + 0x5f, 0x67, 0x65, 0x74, 0x02, 0x2b, 0x5f, 0x5f, 0x69, 0x6d, 0x70, 0x6f, + 0x72, 0x74, 0x65, 0x64, 0x5f, 0x77, 0x61, 0x73, 0x69, 0x5f, 0x73, 0x6e, + 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x5f, 0x70, 0x72, 0x65, 0x76, 0x69, + 0x65, 0x77, 0x31, 0x5f, 0x70, 0x72, 0x6f, 0x63, 0x5f, 0x65, 0x78, 0x69, + 0x74, 0x03, 0x11, 0x5f, 0x5f, 0x77, 0x61, 0x73, 0x6d, 0x5f, 0x63, 0x61, + 0x6c, 0x6c, 0x5f, 0x63, 0x74, 0x6f, 0x72, 0x73, 0x04, 0x13, 0x75, 0x6e, + 0x64, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x64, 0x5f, 0x77, 0x65, 0x61, 0x6b, + 0x3a, 0x6d, 0x61, 0x69, 0x6e, 0x05, 0x12, 0x5f, 0x5f, 0x77, 0x61, 0x73, + 0x6d, 0x5f, 0x69, 0x6e, 0x69, 0x74, 0x5f, 0x6d, 0x65, 0x6d, 0x6f, 0x72, + 0x79, 0x06, 0x06, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x07, 0x0a, 0x72, + 0x65, 0x73, 0x65, 0x74, 0x5f, 0x68, 0x65, 0x61, 0x70, 0x08, 0x06, 0x6d, + 0x61, 0x6c, 0x6c, 0x6f, 0x63, 0x09, 0x04, 0x66, 0x72, 0x65, 0x65, 0x0a, + 0x06, 0x63, 0x61, 0x6c, 0x6c, 0x6f, 0x63, 0x0b, 0x07, 0x72, 0x65, 0x61, + 0x6c, 0x6c, 0x6f, 0x63, 0x0c, 0x05, 0x5f, 0x45, 0x78, 0x69, 0x74, 0x0d, + 0x0b, 0x5f, 0x5f, 0x6d, 0x61, 0x69, 0x6e, 0x5f, 0x76, 0x6f, 0x69, 0x64, + 0x0e, 0x0f, 0x5f, 0x5f, 0x77, 0x61, 0x73, 0x69, 0x5f, 0x61, 0x72, 0x67, + 0x73, 0x5f, 0x67, 0x65, 0x74, 0x0f, 0x15, 0x5f, 0x5f, 0x77, 0x61, 0x73, + 0x69, 0x5f, 0x61, 0x72, 0x67, 0x73, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x73, + 0x5f, 0x67, 0x65, 0x74, 0x10, 0x10, 0x5f, 0x5f, 0x77, 0x61, 0x73, 0x69, + 0x5f, 0x70, 0x72, 0x6f, 0x63, 0x5f, 0x65, 0x78, 0x69, 0x74, 0x11, 0x05, + 0x64, 0x75, 0x6d, 0x6d, 0x79, 0x12, 0x11, 0x5f, 0x5f, 0x77, 0x61, 0x73, + 0x6d, 0x5f, 0x63, 0x61, 0x6c, 0x6c, 0x5f, 0x64, 0x74, 0x6f, 0x72, 0x73, + 0x13, 0x06, 0x6d, 0x65, 0x6d, 0x63, 0x70, 0x79, 0x14, 0x06, 0x6d, 0x65, + 0x6d, 0x73, 0x65, 0x74, 0x15, 0x06, 0x73, 0x74, 0x72, 0x6c, 0x65, 0x6e, + 0x16, 0x08, 0x69, 0x73, 0x77, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x17, 0x06, + 0x6d, 0x65, 0x6d, 0x63, 0x6d, 0x70, 0x18, 0x06, 0x6d, 0x65, 0x6d, 0x63, + 0x68, 0x72, 0x19, 0x06, 0x73, 0x74, 0x72, 0x63, 0x6d, 0x70, 0x1a, 0x08, + 0x74, 0x6f, 0x77, 0x6c, 0x6f, 0x77, 0x65, 0x72, 0x1b, 0x07, 0x63, 0x61, + 0x73, 0x65, 0x6d, 0x61, 0x70, 0x1c, 0x08, 0x74, 0x6f, 0x77, 0x75, 0x70, + 0x70, 0x65, 0x72, 0x1d, 0x07, 0x73, 0x74, 0x72, 0x6e, 0x63, 0x6d, 0x70, + 0x1e, 0x08, 0x69, 0x73, 0x77, 0x75, 0x70, 0x70, 0x65, 0x72, 0x1f, 0x07, + 0x6d, 0x65, 0x6d, 0x6d, 0x6f, 0x76, 0x65, 0x20, 0x08, 0x69, 0x73, 0x77, + 0x6c, 0x6f, 0x77, 0x65, 0x72, 0x21, 0x07, 0x69, 0x73, 0x62, 0x6c, 0x61, + 0x6e, 0x6b, 0x22, 0x08, 0x69, 0x73, 0x77, 0x62, 0x6c, 0x61, 0x6e, 0x6b, + 0x23, 0x08, 0x69, 0x73, 0x77, 0x64, 0x69, 0x67, 0x69, 0x74, 0x24, 0x07, + 0x73, 0x74, 0x72, 0x6e, 0x63, 0x61, 0x74, 0x25, 0x09, 0x5f, 0x5f, 0x73, + 0x74, 0x70, 0x6e, 0x63, 0x70, 0x79, 0x26, 0x07, 0x73, 0x74, 0x72, 0x6e, + 0x63, 0x70, 0x79, 0x27, 0x09, 0x69, 0x73, 0x77, 0x78, 0x64, 0x69, 0x67, + 0x69, 0x74, 0x28, 0x06, 0x77, 0x63, 0x73, 0x6c, 0x65, 0x6e, 0x29, 0x06, + 0x77, 0x63, 0x73, 0x63, 0x68, 0x72, 0x2a, 0x08, 0x69, 0x73, 0x77, 0x73, + 0x70, 0x61, 0x63, 0x65, 0x2b, 0x08, 0x69, 0x73, 0x77, 0x61, 0x6c, 0x6e, + 0x75, 0x6d, 0x07, 0x33, 0x02, 0x00, 0x0f, 0x5f, 0x5f, 0x73, 0x74, 0x61, + 0x63, 0x6b, 0x5f, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x01, 0x1f, + 0x47, 0x4f, 0x54, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x69, 0x6e, 0x74, + 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2e, 0x5f, 0x5f, 0x6d, 0x65, 0x6d, 0x6f, + 0x72, 0x79, 0x5f, 0x62, 0x61, 0x73, 0x65, 0x09, 0x0a, 0x01, 0x00, 0x07, + 0x2e, 0x72, 0x6f, 0x64, 0x61, 0x74, 0x61, 0x00, 0x76, 0x09, 0x70, 0x72, + 0x6f, 0x64, 0x75, 0x63, 0x65, 0x72, 0x73, 0x01, 0x0c, 0x70, 0x72, 0x6f, + 0x63, 0x65, 0x73, 0x73, 0x65, 0x64, 0x2d, 0x62, 0x79, 0x01, 0x05, 0x63, + 0x6c, 0x61, 0x6e, 0x67, 0x56, 0x31, 0x37, 0x2e, 0x30, 0x2e, 0x36, 0x20, + 0x28, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x67, 0x69, 0x74, + 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6c, 0x6c, 0x76, 0x6d, + 0x2f, 0x6c, 0x6c, 0x76, 0x6d, 0x2d, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, + 0x74, 0x20, 0x36, 0x30, 0x30, 0x39, 0x37, 0x30, 0x38, 0x62, 0x34, 0x33, + 0x36, 0x37, 0x31, 0x37, 0x31, 0x63, 0x63, 0x64, 0x62, 0x66, 0x34, 0x62, + 0x35, 0x39, 0x30, 0x35, 0x63, 0x62, 0x36, 0x61, 0x38, 0x30, 0x33, 0x37, + 0x35, 0x33, 0x66, 0x65, 0x31, 0x38, 0x29, 0x00, 0x39, 0x0f, 0x74, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x73, 0x03, 0x2b, 0x0b, 0x62, 0x75, 0x6c, 0x6b, 0x2d, 0x6d, 0x65, 0x6d, + 0x6f, 0x72, 0x79, 0x2b, 0x0f, 0x6d, 0x75, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x2d, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x2b, 0x08, 0x73, 0x69, + 0x67, 0x6e, 0x2d, 0x65, 0x78, 0x74 +}; +unsigned int STDLIB_WASM_LEN = 15582; diff --git a/lib/src/wasm_store.c b/lib/src/wasm_store.c new file mode 100644 index 0000000..374c352 --- /dev/null +++ b/lib/src/wasm_store.c @@ -0,0 +1,1846 @@ +#include "tree_sitter/api.h" +#include "./parser.h" +#include + +#ifdef TREE_SITTER_FEATURE_WASM + +#include +#include +#include +#include "./alloc.h" +#include "./array.h" +#include "./atomic.h" +#include "./language.h" +#include "./lexer.h" +#include "./wasm_store.h" +#include "./wasm/wasm-stdlib.h" + +#define array_len(a) (sizeof(a) / sizeof(a[0])) + +// The following symbols from the C and C++ standard libraries are available +// for external scanners to use. +const char *STDLIB_SYMBOLS[] = { + #include "./stdlib-symbols.txt" +}; + +// The contents of the `dylink.0` custom section of a wasm module, +// as specified by the current WebAssembly dynamic linking ABI proposal. +typedef struct { + uint32_t memory_size; + uint32_t memory_align; + uint32_t table_size; + uint32_t table_align; +} WasmDylinkInfo; + +// WasmLanguageId - A pointer used to identify a language. This language id is +// reference-counted, so that its ownership can be shared between the language +// itself and the instances of the language that are held in wasm stores. +typedef struct { + volatile uint32_t ref_count; + volatile uint32_t is_language_deleted; +} WasmLanguageId; + +// LanguageWasmModule - Additional data associated with a wasm-backed +// `TSLanguage`. This data is read-only and does not reference a particular +// wasm store, so it can be shared by all users of a `TSLanguage`. A pointer to +// this is stored on the language itself. +typedef struct { + volatile uint32_t ref_count; + WasmLanguageId *language_id; + wasmtime_module_t *module; + const char *name; + char *symbol_name_buffer; + char *field_name_buffer; + WasmDylinkInfo dylink_info; +} LanguageWasmModule; + +// LanguageWasmInstance - Additional data associated with an instantiation of +// a `TSLanguage` in a particular wasm store. The wasm store holds one of +// these structs for each language that it has instantiated. +typedef struct { + WasmLanguageId *language_id; + wasmtime_instance_t instance; + int32_t external_states_address; + int32_t lex_main_fn_index; + int32_t lex_keyword_fn_index; + int32_t scanner_create_fn_index; + int32_t scanner_destroy_fn_index; + int32_t scanner_serialize_fn_index; + int32_t scanner_deserialize_fn_index; + int32_t scanner_scan_fn_index; +} LanguageWasmInstance; + +typedef struct { + uint32_t reset_heap; + uint32_t proc_exit; + uint32_t abort; + uint32_t assert_fail; + uint32_t notify_memory_growth; + uint32_t debug_message; + uint32_t at_exit; + uint32_t args_get; + uint32_t args_sizes_get; +} BuiltinFunctionIndices; + +// TSWasmStore - A struct that allows a given `Parser` to use wasm-backed +// languages. This struct is mutable, and can only be used by one parser at a +// time. +struct TSWasmStore { + wasm_engine_t *engine; + wasmtime_store_t *store; + wasmtime_table_t function_table; + wasmtime_memory_t memory; + TSLexer *current_lexer; + LanguageWasmInstance *current_instance; + Array(LanguageWasmInstance) language_instances; + uint32_t current_memory_offset; + uint32_t current_function_table_offset; + uint32_t *stdlib_fn_indices; + BuiltinFunctionIndices builtin_fn_indices; + wasmtime_global_t stack_pointer_global; + wasm_globaltype_t *const_i32_type; + bool has_error; + uint32_t lexer_address; +}; + +typedef Array(char) StringData; + +// LanguageInWasmMemory - The memory layout of a `TSLanguage` when compiled to +// wasm32. This is used to copy static language data out of the wasm memory. +typedef struct { + uint32_t version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; + int32_t parse_table; + int32_t small_parse_table; + int32_t small_parse_table_map; + int32_t parse_actions; + int32_t symbol_names; + int32_t field_names; + int32_t field_map_slices; + int32_t field_map_entries; + int32_t symbol_metadata; + int32_t public_symbol_map; + int32_t alias_map; + int32_t alias_sequences; + int32_t lex_modes; + int32_t lex_fn; + int32_t keyword_lex_fn; + TSSymbol keyword_capture_token; + struct { + int32_t states; + int32_t symbol_map; + int32_t create; + int32_t destroy; + int32_t scan; + int32_t serialize; + int32_t deserialize; + } external_scanner; + int32_t primary_state_ids; +} LanguageInWasmMemory; + +// LexerInWasmMemory - The memory layout of a `TSLexer` when compiled to wasm32. +// This is used to copy mutable lexing state in and out of the wasm memory. +typedef struct { + int32_t lookahead; + TSSymbol result_symbol; + int32_t advance; + int32_t mark_end; + int32_t get_column; + int32_t is_at_included_range_start; + int32_t eof; +} LexerInWasmMemory; + +static volatile uint32_t NEXT_LANGUAGE_ID; + +// Linear memory layout: +// [ <-- stack | stdlib statics | lexer | language statics --> | serialization_buffer | heap --> ] +#define MAX_MEMORY_SIZE (128 * 1024 * 1024 / MEMORY_PAGE_SIZE) + +/************************ + * WasmDylinkMemoryInfo + ***********************/ + +static uint8_t read_u8(const uint8_t **p, const uint8_t *end) { + return *(*p)++; +} + +static inline uint64_t read_uleb128(const uint8_t **p, const uint8_t *end) { + uint64_t value = 0; + unsigned shift = 0; + do { + if (*p == end) return UINT64_MAX; + value += (uint64_t)(**p & 0x7f) << shift; + shift += 7; + } while (*((*p)++) >= 128); + return value; +} + +static bool wasm_dylink_info__parse( + const uint8_t *bytes, + size_t length, + WasmDylinkInfo *info +) { + const uint8_t WASM_MAGIC_NUMBER[4] = {0, 'a', 's', 'm'}; + const uint8_t WASM_VERSION[4] = {1, 0, 0, 0}; + const uint8_t WASM_CUSTOM_SECTION = 0x0; + const uint8_t WASM_DYLINK_MEM_INFO = 0x1; + + const uint8_t *p = bytes; + const uint8_t *end = bytes + length; + + if (length < 8) return false; + if (memcmp(p, WASM_MAGIC_NUMBER, 4) != 0) return false; + p += 4; + if (memcmp(p, WASM_VERSION, 4) != 0) return false; + p += 4; + + while (p < end) { + uint8_t section_id = read_u8(&p, end); + uint32_t section_length = read_uleb128(&p, end); + const uint8_t *section_end = p + section_length; + if (section_end > end) return false; + + if (section_id == WASM_CUSTOM_SECTION) { + uint32_t name_length = read_uleb128(&p, section_end); + const uint8_t *name_end = p + name_length; + if (name_end > section_end) return false; + + if (name_length == 8 && memcmp(p, "dylink.0", 8) == 0) { + p = name_end; + while (p < section_end) { + uint8_t subsection_type = read_u8(&p, section_end); + uint32_t subsection_size = read_uleb128(&p, section_end); + const uint8_t *subsection_end = p + subsection_size; + if (subsection_end > section_end) return false; + if (subsection_type == WASM_DYLINK_MEM_INFO) { + info->memory_size = read_uleb128(&p, subsection_end); + info->memory_align = read_uleb128(&p, subsection_end); + info->table_size = read_uleb128(&p, subsection_end); + info->table_align = read_uleb128(&p, subsection_end); + return true; + } + p = subsection_end; + } + } + } + p = section_end; + } + return false; +} + +/******************************************* + * Native callbacks exposed to wasm modules + *******************************************/ + + static wasm_trap_t *callback__abort( + void *env, + wasmtime_caller_t* caller, + wasmtime_val_raw_t *args_and_results, + size_t args_and_results_len +) { + return wasmtime_trap_new("wasm module called abort", 24); +} + +static wasm_trap_t *callback__debug_message( + void *env, + wasmtime_caller_t* caller, + wasmtime_val_raw_t *args_and_results, + size_t args_and_results_len +) { + wasmtime_context_t *context = wasmtime_caller_context(caller); + TSWasmStore *store = env; + assert(args_and_results_len == 2); + uint32_t string_address = args_and_results[0].i32; + uint32_t value = args_and_results[1].i32; + uint8_t *memory = wasmtime_memory_data(context, &store->memory); + printf("DEBUG: %s %u\n", &memory[string_address], value); + return NULL; +} + +static wasm_trap_t *callback__noop( + void *env, + wasmtime_caller_t* caller, + wasmtime_val_raw_t *args_and_results, + size_t args_and_results_len +) { + return NULL; +} + +static wasm_trap_t *callback__lexer_advance( + void *env, + wasmtime_caller_t* caller, + wasmtime_val_raw_t *args_and_results, + size_t args_and_results_len +) { + wasmtime_context_t *context = wasmtime_caller_context(caller); + assert(args_and_results_len == 2); + + TSWasmStore *store = env; + TSLexer *lexer = store->current_lexer; + bool skip = args_and_results[1].i32; + lexer->advance(lexer, skip); + + uint8_t *memory = wasmtime_memory_data(context, &store->memory); + memcpy(&memory[store->lexer_address], &lexer->lookahead, sizeof(lexer->lookahead)); + return NULL; +} + +static wasm_trap_t *callback__lexer_mark_end( + void *env, + wasmtime_caller_t* caller, + wasmtime_val_raw_t *args_and_results, + size_t args_and_results_len +) { + TSWasmStore *store = env; + TSLexer *lexer = store->current_lexer; + lexer->mark_end(lexer); + return NULL; +} + +static wasm_trap_t *callback__lexer_get_column( + void *env, + wasmtime_caller_t* caller, + wasmtime_val_raw_t *args_and_results, + size_t args_and_results_len +) { + TSWasmStore *store = env; + TSLexer *lexer = store->current_lexer; + uint32_t result = lexer->get_column(lexer); + args_and_results[0].i32 = result; + return NULL; +} + +static wasm_trap_t *callback__lexer_is_at_included_range_start( + void *env, + wasmtime_caller_t* caller, + wasmtime_val_raw_t *args_and_results, + size_t args_and_results_len +) { + TSWasmStore *store = env; + TSLexer *lexer = store->current_lexer; + bool result = lexer->is_at_included_range_start(lexer); + args_and_results[0].i32 = result; + return NULL; +} + +static wasm_trap_t *callback__lexer_eof( + void *env, + wasmtime_caller_t* caller, + wasmtime_val_raw_t *args_and_results, + size_t args_and_results_len +) { + TSWasmStore *store = env; + TSLexer *lexer = store->current_lexer; + bool result = lexer->eof(lexer); + args_and_results[0].i32 = result; + return NULL; +} + +typedef struct { + uint32_t *storage_location; + wasmtime_func_unchecked_callback_t callback; + wasm_functype_t *type; +} FunctionDefinition; + +static void *copy(const void *data, size_t size) { + void *result = ts_malloc(size); + memcpy(result, data, size); + return result; +} + +static void *copy_unsized_static_array( + const uint8_t *data, + int32_t start_address, + const int32_t all_addresses[], + size_t address_count +) { + int32_t end_address = 0; + for (unsigned i = 0; i < address_count; i++) { + if (all_addresses[i] > start_address) { + if (!end_address || all_addresses[i] < end_address) { + end_address = all_addresses[i]; + } + } + } + + if (!end_address) return NULL; + size_t size = end_address - start_address; + void *result = ts_malloc(size); + memcpy(result, &data[start_address], size); + return result; +} + +static void *copy_strings( + const uint8_t *data, + int32_t array_address, + size_t count, + StringData *string_data +) { + const char **result = ts_malloc(count * sizeof(char *)); + for (unsigned i = 0; i < count; i++) { + int32_t address; + memcpy(&address, &data[array_address + i * sizeof(address)], sizeof(address)); + if (address == 0) { + result[i] = (const char *)-1; + } else { + const uint8_t *string = &data[address]; + uint32_t len = strlen((const char *)string); + result[i] = (const char *)(uintptr_t)string_data->size; + array_extend(string_data, len + 1, string); + } + } + for (unsigned i = 0; i < count; i++) { + if (result[i] == (const char *)-1) { + result[i] = NULL; + } else { + result[i] = string_data->contents + (uintptr_t)result[i]; + } + } + return result; +} + +static bool name_eq(const wasm_name_t *name, const char *string) { + return strncmp(string, name->data, name->size) == 0; +} + +static inline wasm_functype_t* wasm_functype_new_4_0( + wasm_valtype_t* p1, + wasm_valtype_t* p2, + wasm_valtype_t* p3, + wasm_valtype_t* p4 +) { + wasm_valtype_t* ps[4] = {p1, p2, p3, p4}; + wasm_valtype_vec_t params, results; + wasm_valtype_vec_new(¶ms, 4, ps); + wasm_valtype_vec_new_empty(&results); + return wasm_functype_new(¶ms, &results); +} + +#define format(output, ...) \ + do { \ + size_t message_length = snprintf((char *)NULL, 0, __VA_ARGS__); \ + *output = ts_malloc(message_length + 1); \ + snprintf(*output, message_length + 1, __VA_ARGS__); \ + } while (0) + +WasmLanguageId *language_id_new() { + WasmLanguageId *self = ts_malloc(sizeof(WasmLanguageId)); + self->is_language_deleted = false; + self->ref_count = 1; + return self; +} + +WasmLanguageId *language_id_clone(WasmLanguageId *self) { + atomic_inc(&self->ref_count); + return self; +} + +void language_id_delete(WasmLanguageId *self) { + if (atomic_dec(&self->ref_count) == 0) { + ts_free(self); + } +} + +static wasmtime_extern_t get_builtin_extern( + wasmtime_table_t *table, + unsigned index +) { + return (wasmtime_extern_t) { + .kind = WASMTIME_EXTERN_FUNC, + .of.func = (wasmtime_func_t) { + .store_id = table->store_id, + .index = index + } + }; +} + +static bool ts_wasm_store__provide_builtin_import( + TSWasmStore *self, + const wasm_name_t *import_name, + wasmtime_extern_t *import +) { + wasmtime_error_t *error = NULL; + wasmtime_context_t *context = wasmtime_store_context(self->store); + + // Dynamic linking parameters + if (name_eq(import_name, "__memory_base")) { + wasmtime_val_t value = WASM_I32_VAL(self->current_memory_offset); + wasmtime_global_t global; + error = wasmtime_global_new(context, self->const_i32_type, &value, &global); + assert(!error); + *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = global}; + } else if (name_eq(import_name, "__table_base")) { + wasmtime_val_t value = WASM_I32_VAL(self->current_function_table_offset); + wasmtime_global_t global; + error = wasmtime_global_new(context, self->const_i32_type, &value, &global); + assert(!error); + *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = global}; + } else if (name_eq(import_name, "__stack_pointer")) { + *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_GLOBAL, .of.global = self->stack_pointer_global}; + } else if (name_eq(import_name, "__indirect_function_table")) { + *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_TABLE, .of.table = self->function_table}; + } else if (name_eq(import_name, "memory")) { + *import = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_MEMORY, .of.memory = self->memory}; + } + + // Builtin functions + else if (name_eq(import_name, "__assert_fail")) { + *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.assert_fail); + } else if (name_eq(import_name, "__cxa_atexit")) { + *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.at_exit); + } else if (name_eq(import_name, "args_get")) { + *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.args_get); + } else if (name_eq(import_name, "args_sizes_get")) { + *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.args_sizes_get); + } else if (name_eq(import_name, "abort")) { + *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.abort); + } else if (name_eq(import_name, "proc_exit")) { + *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.proc_exit); + } else if (name_eq(import_name, "emscripten_notify_memory_growth")) { + *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.notify_memory_growth); + } else if (name_eq(import_name, "tree_sitter_debug_message")) { + *import = get_builtin_extern(&self->function_table, self->builtin_fn_indices.debug_message); + } else { + return false; + } + + return true; +} + +static bool ts_wasm_store__call_module_initializer( + TSWasmStore *self, + const wasm_name_t *export_name, + wasmtime_extern_t *export, + wasm_trap_t **trap +) { + if ( + name_eq(export_name, "_initialize") || + name_eq(export_name, "__wasm_apply_data_relocs") || + name_eq(export_name, "__wasm_call_ctors") + ) { + wasmtime_context_t *context = wasmtime_store_context(self->store); + wasmtime_func_t initialization_func = export->of.func; + wasmtime_error_t *error = wasmtime_func_call(context, &initialization_func, NULL, 0, NULL, 0, trap); + assert(!error); + return true; + } else { + return false; + } +} + +TSWasmStore *ts_wasm_store_new(TSWasmEngine *engine, TSWasmError *wasm_error) { + TSWasmStore *self = ts_calloc(1, sizeof(TSWasmStore)); + wasmtime_store_t *store = wasmtime_store_new(engine, self, NULL); + wasmtime_context_t *context = wasmtime_store_context(store); + wasmtime_error_t *error = NULL; + wasm_trap_t *trap = NULL; + wasm_message_t message = WASM_EMPTY_VEC; + wasm_exporttype_vec_t export_types = WASM_EMPTY_VEC; + wasmtime_extern_t *imports = NULL; + wasmtime_module_t *stdlib_module = NULL; + wasm_memorytype_t *memory_type = NULL; + wasm_tabletype_t *table_type = NULL; + + // Define functions called by scanners via function pointers on the lexer. + LexerInWasmMemory lexer = { + .lookahead = 0, + .result_symbol = 0, + }; + FunctionDefinition lexer_definitions[] = { + { + (uint32_t *)&lexer.advance, + callback__lexer_advance, + wasm_functype_new_2_0(wasm_valtype_new_i32(), wasm_valtype_new_i32()) + }, + { + (uint32_t *)&lexer.mark_end, + callback__lexer_mark_end, + wasm_functype_new_1_0(wasm_valtype_new_i32()) + }, + { + (uint32_t *)&lexer.get_column, + callback__lexer_get_column, + wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32()) + }, + { + (uint32_t *)&lexer.is_at_included_range_start, + callback__lexer_is_at_included_range_start, + wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32()) + }, + { + (uint32_t *)&lexer.eof, + callback__lexer_eof, + wasm_functype_new_1_1(wasm_valtype_new_i32(), wasm_valtype_new_i32()) + }, + }; + + // Define builtin functions that can be imported by scanners. + BuiltinFunctionIndices builtin_fn_indices; + FunctionDefinition builtin_definitions[] = { + { + &builtin_fn_indices.proc_exit, + callback__abort, + wasm_functype_new_1_0(wasm_valtype_new_i32()) + }, + { + &builtin_fn_indices.abort, + callback__abort, + wasm_functype_new_0_0() + }, + { + &builtin_fn_indices.assert_fail, + callback__abort, + wasm_functype_new_4_0(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32()) + }, + { + &builtin_fn_indices.notify_memory_growth, + callback__noop, + wasm_functype_new_1_0(wasm_valtype_new_i32()) + }, + { + &builtin_fn_indices.debug_message, + callback__debug_message, + wasm_functype_new_2_0(wasm_valtype_new_i32(), wasm_valtype_new_i32()) + }, + { + &builtin_fn_indices.at_exit, + callback__noop, + wasm_functype_new_3_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32()) + }, + { + &builtin_fn_indices.args_get, + callback__noop, + wasm_functype_new_2_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32()) + }, + { + &builtin_fn_indices.args_sizes_get, + callback__noop, + wasm_functype_new_2_1(wasm_valtype_new_i32(), wasm_valtype_new_i32(), wasm_valtype_new_i32()) + }, + }; + + // Create all of the wasm functions. + unsigned builtin_definitions_len = array_len(builtin_definitions); + unsigned lexer_definitions_len = array_len(lexer_definitions); + for (unsigned i = 0; i < builtin_definitions_len; i++) { + FunctionDefinition *definition = &builtin_definitions[i]; + wasmtime_func_t func; + wasmtime_func_new_unchecked(context, definition->type, definition->callback, self, NULL, &func); + *definition->storage_location = func.index; + wasm_functype_delete(definition->type); + } + for (unsigned i = 0; i < lexer_definitions_len; i++) { + FunctionDefinition *definition = &lexer_definitions[i]; + wasmtime_func_t func; + wasmtime_func_new_unchecked(context, definition->type, definition->callback, self, NULL, &func); + *definition->storage_location = func.index; + wasm_functype_delete(definition->type); + } + + // Compile the stdlib module. + error = wasmtime_module_new(engine, STDLIB_WASM, STDLIB_WASM_LEN, &stdlib_module); + if (error) { + wasmtime_error_message(error, &message); + wasm_error->kind = TSWasmErrorKindCompile; + format( + &wasm_error->message, + "failed to compile wasm stdlib: %.*s", + (int)message.size, message.data + ); + goto error; + } + + // Retrieve the stdlib module's imports. + wasm_importtype_vec_t import_types = WASM_EMPTY_VEC; + wasmtime_module_imports(stdlib_module, &import_types); + + // Find the initial number of memory pages needed by the stdlib. + const wasm_memorytype_t *stdlib_memory_type; + for (unsigned i = 0; i < import_types.size; i++) { + wasm_importtype_t *import_type = import_types.data[i]; + const wasm_name_t *import_name = wasm_importtype_name(import_type); + if (name_eq(import_name, "memory")) { + const wasm_externtype_t *type = wasm_importtype_type(import_type); + stdlib_memory_type = wasm_externtype_as_memorytype_const(type); + } + } + if (!stdlib_memory_type) { + wasm_error->kind = TSWasmErrorKindCompile; + format( + &wasm_error->message, + "wasm stdlib is missing the 'memory' import" + ); + goto error; + } + + // Initialize store's memory + uint64_t initial_memory_pages = wasmtime_memorytype_minimum(stdlib_memory_type); + wasm_limits_t memory_limits = {.min = initial_memory_pages, .max = MAX_MEMORY_SIZE}; + memory_type = wasm_memorytype_new(&memory_limits); + wasmtime_memory_t memory; + error = wasmtime_memory_new(context, memory_type, &memory); + if (error) { + wasmtime_error_message(error, &message); + wasm_error->kind = TSWasmErrorKindAllocate; + format( + &wasm_error->message, + "failed to allocate wasm memory: %.*s", + (int)message.size, message.data + ); + goto error; + } + wasm_memorytype_delete(memory_type); + memory_type = NULL; + + // Initialize store's function table + wasm_limits_t table_limits = {.min = 1, .max = wasm_limits_max_default}; + table_type = wasm_tabletype_new(wasm_valtype_new(WASM_FUNCREF), &table_limits); + wasmtime_val_t initializer = {.kind = WASMTIME_FUNCREF}; + wasmtime_table_t function_table; + error = wasmtime_table_new(context, table_type, &initializer, &function_table); + if (error) { + wasmtime_error_message(error, &message); + wasm_error->kind = TSWasmErrorKindAllocate; + format( + &wasm_error->message, + "failed to allocate wasm table: %.*s", + (int)message.size, message.data + ); + goto error; + } + wasm_tabletype_delete(table_type); + table_type = NULL; + + unsigned stdlib_symbols_len = array_len(STDLIB_SYMBOLS); + + // Define globals for the stack and heap start addresses. + wasm_globaltype_t *const_i32_type = wasm_globaltype_new(wasm_valtype_new_i32(), WASM_CONST); + wasm_globaltype_t *var_i32_type = wasm_globaltype_new(wasm_valtype_new_i32(), WASM_VAR); + + wasmtime_val_t stack_pointer_value = WASM_I32_VAL(0); + wasmtime_global_t stack_pointer_global; + error = wasmtime_global_new(context, var_i32_type, &stack_pointer_value, &stack_pointer_global); + assert(!error); + + *self = (TSWasmStore) { + .engine = engine, + .store = store, + .memory = memory, + .function_table = function_table, + .language_instances = array_new(), + .stdlib_fn_indices = ts_calloc(stdlib_symbols_len, sizeof(uint32_t)), + .builtin_fn_indices = builtin_fn_indices, + .stack_pointer_global = stack_pointer_global, + .current_memory_offset = 0, + .current_function_table_offset = 0, + .const_i32_type = const_i32_type, + }; + + // Set up the imports for the stdlib module. + imports = ts_calloc(import_types.size, sizeof(wasmtime_extern_t)); + for (unsigned i = 0; i < import_types.size; i++) { + wasm_importtype_t *type = import_types.data[i]; + const wasm_name_t *import_name = wasm_importtype_name(type); + if (!ts_wasm_store__provide_builtin_import(self, import_name, &imports[i])) { + wasm_error->kind = TSWasmErrorKindInstantiate; + format( + &wasm_error->message, + "unexpected import in wasm stdlib: %.*s\n", + (int)import_name->size, import_name->data + ); + goto error; + } + } + + // Instantiate the stdlib module. + wasmtime_instance_t instance; + error = wasmtime_instance_new(context, stdlib_module, imports, import_types.size, &instance, &trap); + ts_free(imports); + imports = NULL; + if (error) { + wasmtime_error_message(error, &message); + wasm_error->kind = TSWasmErrorKindInstantiate; + format( + &wasm_error->message, + "failed to instantiate wasm stdlib module: %.*s", + (int)message.size, message.data + ); + goto error; + } + if (trap) { + wasm_trap_message(trap, &message); + wasm_error->kind = TSWasmErrorKindInstantiate; + format( + &wasm_error->message, + "trapped when instantiating wasm stdlib module: %.*s", + (int)message.size, message.data + ); + goto error; + } + wasm_importtype_vec_delete(&import_types); + + // Process the stdlib module's exports. + for (unsigned i = 0; i < stdlib_symbols_len; i++) { + self->stdlib_fn_indices[i] = UINT32_MAX; + } + wasmtime_module_exports(stdlib_module, &export_types); + for (unsigned i = 0; i < export_types.size; i++) { + wasm_exporttype_t *export_type = export_types.data[i]; + const wasm_name_t *name = wasm_exporttype_name(export_type); + + char *export_name; + size_t name_len; + wasmtime_extern_t export = {.kind = WASM_EXTERN_GLOBAL}; + bool exists = wasmtime_instance_export_nth(context, &instance, i, &export_name, &name_len, &export); + assert(exists); + + if (export.kind == WASMTIME_EXTERN_GLOBAL) { + if (name_eq(name, "__stack_pointer")) { + self->stack_pointer_global = export.of.global; + } + } + + if (export.kind == WASMTIME_EXTERN_FUNC) { + if (ts_wasm_store__call_module_initializer(self, name, &export, &trap)) { + if (trap) { + wasm_trap_message(trap, &message); + wasm_error->kind = TSWasmErrorKindInstantiate; + format( + &wasm_error->message, + "trap when calling stdlib relocation function: %.*s\n", + (int)message.size, message.data + ); + goto error; + } + continue; + } + + if (name_eq(name, "reset_heap")) { + self->builtin_fn_indices.reset_heap = export.of.func.index; + continue; + } + + for (unsigned j = 0; j < stdlib_symbols_len; j++) { + if (name_eq(name, STDLIB_SYMBOLS[j])) { + self->stdlib_fn_indices[j] = export.of.func.index; + break; + } + } + } + } + + if (self->builtin_fn_indices.reset_heap == UINT32_MAX) { + wasm_error->kind = TSWasmErrorKindInstantiate; + format( + &wasm_error->message, + "missing malloc reset function in wasm stdlib" + ); + goto error; + } + + for (unsigned i = 0; i < stdlib_symbols_len; i++) { + if (self->stdlib_fn_indices[i] == UINT32_MAX) { + wasm_error->kind = TSWasmErrorKindInstantiate; + format( + &wasm_error->message, + "missing exported symbol in wasm stdlib: %s", + STDLIB_SYMBOLS[i] + ); + goto error; + } + } + + wasm_exporttype_vec_delete(&export_types); + wasmtime_module_delete(stdlib_module); + + // Add all of the lexer callback functions to the function table. Store their function table + // indices on the in-memory lexer. + uint32_t table_index; + error = wasmtime_table_grow(context, &function_table, lexer_definitions_len, &initializer, &table_index); + if (error) { + wasmtime_error_message(error, &message); + wasm_error->kind = TSWasmErrorKindAllocate; + format( + &wasm_error->message, + "failed to grow wasm table to initial size: %.*s", + (int)message.size, message.data + ); + goto error; + } + for (unsigned i = 0; i < lexer_definitions_len; i++) { + FunctionDefinition *definition = &lexer_definitions[i]; + wasmtime_func_t func = {function_table.store_id, *definition->storage_location}; + wasmtime_val_t func_val = {.kind = WASMTIME_FUNCREF, .of.funcref = func}; + error = wasmtime_table_set(context, &function_table, table_index, &func_val); + assert(!error); + *(int32_t *)(definition->storage_location) = table_index; + table_index++; + } + + self->current_function_table_offset = table_index; + self->lexer_address = initial_memory_pages * MEMORY_PAGE_SIZE; + self->current_memory_offset = self->lexer_address + sizeof(LexerInWasmMemory); + + // Grow the memory enough to hold the builtin lexer and serialization buffer. + uint32_t new_pages_needed = (self->current_memory_offset - self->lexer_address - 1) / MEMORY_PAGE_SIZE + 1; + uint64_t prev_memory_size; + wasmtime_memory_grow(context, &memory, new_pages_needed, &prev_memory_size); + + uint8_t *memory_data = wasmtime_memory_data(context, &memory); + memcpy(&memory_data[self->lexer_address], &lexer, sizeof(lexer)); + return self; + +error: + ts_free(self); + if (stdlib_module) wasmtime_module_delete(stdlib_module); + if (store) wasmtime_store_delete(store); + if (import_types.size) wasm_importtype_vec_delete(&import_types); + if (memory_type) wasm_memorytype_delete(memory_type); + if (table_type) wasm_tabletype_delete(table_type); + if (trap) wasm_trap_delete(trap); + if (error) wasmtime_error_delete(error); + if (message.size) wasm_byte_vec_delete(&message); + if (export_types.size) wasm_exporttype_vec_delete(&export_types); + if (imports) ts_free(imports); + return NULL; +} + +void ts_wasm_store_delete(TSWasmStore *self) { + if (!self) return; + ts_free(self->stdlib_fn_indices); + wasm_globaltype_delete(self->const_i32_type); + wasmtime_store_delete(self->store); + wasm_engine_delete(self->engine); + for (unsigned i = 0; i < self->language_instances.size; i++) { + LanguageWasmInstance *instance = &self->language_instances.contents[i]; + language_id_delete(instance->language_id); + } + array_delete(&self->language_instances); + ts_free(self); +} + +size_t ts_wasm_store_language_count(const TSWasmStore *self) { + size_t result = 0; + for (unsigned i = 0; i < self->language_instances.size; i++) { + const WasmLanguageId *id = self->language_instances.contents[i].language_id; + if (!id->is_language_deleted) { + result++; + } + } + return result; +} + +static uint32_t ts_wasm_store__heap_address(TSWasmStore *self) { + return self->current_memory_offset + TREE_SITTER_SERIALIZATION_BUFFER_SIZE; +} + +static uint32_t ts_wasm_store__serialization_buffer_address(TSWasmStore *self) { + return self->current_memory_offset; +} + +static bool ts_wasm_store__instantiate( + TSWasmStore *self, + wasmtime_module_t *module, + const char *language_name, + const WasmDylinkInfo *dylink_info, + wasmtime_instance_t *result, + int32_t *language_address, + char **error_message +) { + wasmtime_error_t *error = NULL; + wasm_trap_t *trap = NULL; + wasm_message_t message = WASM_EMPTY_VEC; + char *language_function_name = NULL; + wasmtime_extern_t *imports = NULL; + wasmtime_context_t *context = wasmtime_store_context(self->store); + + // Grow the function table to make room for the new functions. + wasmtime_val_t initializer = {.kind = WASMTIME_FUNCREF}; + uint32_t prev_table_size; + error = wasmtime_table_grow(context, &self->function_table, dylink_info->table_size, &initializer, &prev_table_size); + if (error) { + format(error_message, "invalid function table size %u", dylink_info->table_size); + goto error; + } + + // Grow the memory to make room for the new data. + uint32_t needed_memory_size = ts_wasm_store__heap_address(self) + dylink_info->memory_size; + uint32_t current_memory_size = wasmtime_memory_data_size(context, &self->memory); + if (needed_memory_size > current_memory_size) { + uint32_t pages_to_grow = ( + needed_memory_size - current_memory_size + MEMORY_PAGE_SIZE - 1) / + MEMORY_PAGE_SIZE; + uint64_t prev_memory_size; + error = wasmtime_memory_grow(context, &self->memory, pages_to_grow, &prev_memory_size); + if (error) { + format(error_message, "invalid memory size %u", dylink_info->memory_size); + goto error; + } + } + + // Construct the language function name as string. + format(&language_function_name, "tree_sitter_%s", language_name); + + const uint64_t store_id = self->function_table.store_id; + + // Build the imports list for the module. + wasm_importtype_vec_t import_types = WASM_EMPTY_VEC; + wasmtime_module_imports(module, &import_types); + imports = ts_calloc(import_types.size, sizeof(wasmtime_extern_t)); + + for (unsigned i = 0; i < import_types.size; i++) { + const wasm_importtype_t *import_type = import_types.data[i]; + const wasm_name_t *import_name = wasm_importtype_name(import_type); + if (import_name->size == 0) { + format(error_message, "empty import name"); + goto error; + } + + if (ts_wasm_store__provide_builtin_import(self, import_name, &imports[i])) { + continue; + } + + bool defined_in_stdlib = false; + for (unsigned j = 0; j < array_len(STDLIB_SYMBOLS); j++) { + if (name_eq(import_name, STDLIB_SYMBOLS[j])) { + uint16_t address = self->stdlib_fn_indices[j]; + imports[i] = (wasmtime_extern_t) {.kind = WASMTIME_EXTERN_FUNC, .of.func = {store_id, address}}; + defined_in_stdlib = true; + break; + } + } + + if (!defined_in_stdlib) { + format( + error_message, + "invalid import '%.*s'\n", + (int)import_name->size, import_name->data + ); + goto error; + } + } + + wasmtime_instance_t instance; + error = wasmtime_instance_new(context, module, imports, import_types.size, &instance, &trap); + wasm_importtype_vec_delete(&import_types); + ts_free(imports); + imports = NULL; + if (error) { + wasmtime_error_message(error, &message); + format( + error_message, + "error instantiating wasm module: %.*s\n", + (int)message.size, message.data + ); + goto error; + } + if (trap) { + wasm_trap_message(trap, &message); + format( + error_message, + "trap when instantiating wasm module: %.*s\n", + (int)message.size, message.data + ); + goto error; + } + + self->current_memory_offset += dylink_info->memory_size; + self->current_function_table_offset += dylink_info->table_size; + + // Process the module's exports. + bool found_language = false; + wasmtime_extern_t language_extern; + wasm_exporttype_vec_t export_types = WASM_EMPTY_VEC; + wasmtime_module_exports(module, &export_types); + for (unsigned i = 0; i < export_types.size; i++) { + wasm_exporttype_t *export_type = export_types.data[i]; + const wasm_name_t *name = wasm_exporttype_name(export_type); + + size_t name_len; + char *export_name; + wasmtime_extern_t export = {.kind = WASM_EXTERN_GLOBAL}; + bool exists = wasmtime_instance_export_nth(context, &instance, i, &export_name, &name_len, &export); + assert(exists); + + // If the module exports an initialization or data-relocation function, call it. + if (ts_wasm_store__call_module_initializer(self, name, &export, &trap)) { + if (trap) { + wasm_trap_message(trap, &message); + format( + error_message, + "trap when calling data relocation function: %.*s\n", + (int)message.size, message.data + ); + goto error; + } + } + + // Find the main language function for the module. + else if (name_eq(name, language_function_name)) { + language_extern = export; + found_language = true; + } + } + wasm_exporttype_vec_delete(&export_types); + + if (!found_language) { + format( + error_message, + "module did not contain language function: %s", + language_function_name + ); + goto error; + } + + // Invoke the language function to get the static address of the language object. + wasmtime_func_t language_func = language_extern.of.func; + wasmtime_val_t language_address_val; + error = wasmtime_func_call(context, &language_func, NULL, 0, &language_address_val, 1, &trap); + assert(!error); + if (trap) { + wasm_trap_message(trap, &message); + format( + error_message, + "trapped when calling language function: %s: %.*s\n", + language_function_name, (int)message.size, message.data + ); + goto error; + } + + if (language_address_val.kind != WASMTIME_I32) { + format( + error_message, + "language function did not return an integer: %s\n", + language_function_name + ); + goto error; + } + + ts_free(language_function_name); + *result = instance; + *language_address = language_address_val.of.i32; + return true; + +error: + if (language_function_name) ts_free(language_function_name); + if (message.size) wasm_byte_vec_delete(&message); + if (error) wasmtime_error_delete(error); + if (trap) wasm_trap_delete(trap); + if (imports) ts_free(imports); + return false; +} + +static bool ts_wasm_store__sentinel_lex_fn(TSLexer *_lexer, TSStateId state) { + return false; +} + +const TSLanguage *ts_wasm_store_load_language( + TSWasmStore *self, + const char *language_name, + const char *wasm, + uint32_t wasm_len, + TSWasmError *wasm_error +) { + WasmDylinkInfo dylink_info; + wasmtime_module_t *module = NULL; + wasmtime_error_t *error = NULL; + wasm_error->kind = TSWasmErrorKindNone; + + if (!wasm_dylink_info__parse((const unsigned char *)wasm, wasm_len, &dylink_info)) { + wasm_error->kind = TSWasmErrorKindParse; + format(&wasm_error->message, "failed to parse dylink section of wasm module"); + goto error; + } + + // Compile the wasm code. + error = wasmtime_module_new(self->engine, (const uint8_t *)wasm, wasm_len, &module); + if (error) { + wasm_message_t message; + wasmtime_error_message(error, &message); + wasm_error->kind = TSWasmErrorKindCompile; + format(&wasm_error->message, "error compiling wasm module: %.*s", (int)message.size, message.data); + wasm_byte_vec_delete(&message); + goto error; + } + + // Instantiate the module in this store. + wasmtime_instance_t instance; + int32_t language_address; + if (!ts_wasm_store__instantiate( + self, + module, + language_name, + &dylink_info, + &instance, + &language_address, + &wasm_error->message + )) { + wasm_error->kind = TSWasmErrorKindInstantiate; + goto error; + } + + // Copy all of the static data out of the language object in wasm memory, + // constructing a native language object. + LanguageInWasmMemory wasm_language; + wasmtime_context_t *context = wasmtime_store_context(self->store); + const uint8_t *memory = wasmtime_memory_data(context, &self->memory); + memcpy(&wasm_language, &memory[language_address], sizeof(LanguageInWasmMemory)); + + if (wasm_language.version < LANGUAGE_VERSION_USABLE_VIA_WASM) { + wasm_error->kind = TSWasmErrorKindInstantiate; + format(&wasm_error->message, "language version %u is too old for wasm", wasm_language.version); + goto error; + } + + int32_t addresses[] = { + wasm_language.alias_map, + wasm_language.alias_sequences, + wasm_language.field_map_entries, + wasm_language.field_map_slices, + wasm_language.field_names, + wasm_language.keyword_lex_fn, + wasm_language.lex_fn, + wasm_language.lex_modes, + wasm_language.parse_actions, + wasm_language.parse_table, + wasm_language.primary_state_ids, + wasm_language.primary_state_ids, + wasm_language.public_symbol_map, + wasm_language.small_parse_table, + wasm_language.small_parse_table_map, + wasm_language.symbol_metadata, + wasm_language.symbol_metadata, + wasm_language.symbol_names, + wasm_language.external_token_count > 0 ? wasm_language.external_scanner.states : 0, + wasm_language.external_token_count > 0 ? wasm_language.external_scanner.symbol_map : 0, + wasm_language.external_token_count > 0 ? wasm_language.external_scanner.create : 0, + wasm_language.external_token_count > 0 ? wasm_language.external_scanner.destroy : 0, + wasm_language.external_token_count > 0 ? wasm_language.external_scanner.scan : 0, + wasm_language.external_token_count > 0 ? wasm_language.external_scanner.serialize : 0, + wasm_language.external_token_count > 0 ? wasm_language.external_scanner.deserialize : 0, + language_address, + self->current_memory_offset, + }; + uint32_t address_count = array_len(addresses); + + TSLanguage *language = ts_calloc(1, sizeof(TSLanguage)); + StringData symbol_name_buffer = array_new(); + StringData field_name_buffer = array_new(); + + *language = (TSLanguage) { + .version = wasm_language.version, + .symbol_count = wasm_language.symbol_count, + .alias_count = wasm_language.alias_count, + .token_count = wasm_language.token_count, + .external_token_count = wasm_language.external_token_count, + .state_count = wasm_language.state_count, + .large_state_count = wasm_language.large_state_count, + .production_id_count = wasm_language.production_id_count, + .field_count = wasm_language.field_count, + .max_alias_sequence_length = wasm_language.max_alias_sequence_length, + .keyword_capture_token = wasm_language.keyword_capture_token, + .parse_table = copy( + &memory[wasm_language.parse_table], + wasm_language.large_state_count * wasm_language.symbol_count * sizeof(uint16_t) + ), + .parse_actions = copy_unsized_static_array( + memory, + wasm_language.parse_actions, + addresses, + address_count + ), + .symbol_names = copy_strings( + memory, + wasm_language.symbol_names, + wasm_language.symbol_count + wasm_language.alias_count, + &symbol_name_buffer + ), + .symbol_metadata = copy( + &memory[wasm_language.symbol_metadata], + (wasm_language.symbol_count + wasm_language.alias_count) * sizeof(TSSymbolMetadata) + ), + .public_symbol_map = copy( + &memory[wasm_language.public_symbol_map], + (wasm_language.symbol_count + wasm_language.alias_count) * sizeof(TSSymbol) + ), + .lex_modes = copy( + &memory[wasm_language.lex_modes], + wasm_language.state_count * sizeof(TSLexMode) + ), + }; + + if (language->field_count > 0 && language->production_id_count > 0) { + language->field_map_slices = copy( + &memory[wasm_language.field_map_slices], + wasm_language.production_id_count * sizeof(TSFieldMapSlice) + ); + + // Determine the number of field map entries by finding the greatest index + // in any of the slices. + uint32_t field_map_entry_count = 0; + for (uint32_t i = 0; i < wasm_language.production_id_count; i++) { + TSFieldMapSlice slice = language->field_map_slices[i]; + uint32_t slice_end = slice.index + slice.length; + if (slice_end > field_map_entry_count) { + field_map_entry_count = slice_end; + } + } + + language->field_map_entries = copy( + &memory[wasm_language.field_map_entries], + field_map_entry_count * sizeof(TSFieldMapEntry) + ); + language->field_names = copy_strings( + memory, + wasm_language.field_names, + wasm_language.field_count + 1, + &field_name_buffer + ); + } + + if (language->max_alias_sequence_length > 0 && language->production_id_count > 0) { + // The alias map contains symbols, alias counts, and aliases, terminated by a null symbol. + int32_t alias_map_size = 0; + for (;;) { + TSSymbol symbol; + memcpy(&symbol, &memory[wasm_language.alias_map + alias_map_size], sizeof(symbol)); + alias_map_size += sizeof(TSSymbol); + if (symbol == 0) break; + uint16_t value_count; + memcpy(&value_count, &memory[wasm_language.alias_map + alias_map_size], sizeof(value_count)); + alias_map_size += value_count * sizeof(TSSymbol); + } + language->alias_map = copy( + &memory[wasm_language.alias_map], + alias_map_size * sizeof(TSSymbol) + ); + language->alias_sequences = copy( + &memory[wasm_language.alias_sequences], + wasm_language.production_id_count * wasm_language.max_alias_sequence_length * sizeof(TSSymbol) + ); + } + + if (language->state_count > language->large_state_count) { + uint32_t small_state_count = wasm_language.state_count - wasm_language.large_state_count; + language->small_parse_table_map = copy( + &memory[wasm_language.small_parse_table_map], + small_state_count * sizeof(uint32_t) + ); + language->small_parse_table = copy_unsized_static_array( + memory, + wasm_language.small_parse_table, + addresses, + address_count + ); + } + + if (language->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { + language->primary_state_ids = copy( + &memory[wasm_language.primary_state_ids], + wasm_language.state_count * sizeof(TSStateId) + ); + } + + if (language->external_token_count > 0) { + language->external_scanner.symbol_map = copy( + &memory[wasm_language.external_scanner.symbol_map], + wasm_language.external_token_count * sizeof(TSSymbol) + ); + language->external_scanner.states = (void *)(uintptr_t)wasm_language.external_scanner.states; + } + + unsigned name_len = strlen(language_name); + char *name = ts_malloc(name_len + 1); + memcpy(name, language_name, name_len); + name[name_len] = '\0'; + + LanguageWasmModule *language_module = ts_malloc(sizeof(LanguageWasmModule)); + *language_module = (LanguageWasmModule) { + .language_id = language_id_new(), + .module = module, + .name = name, + .symbol_name_buffer = symbol_name_buffer.contents, + .field_name_buffer = field_name_buffer.contents, + .dylink_info = dylink_info, + .ref_count = 1, + }; + + // The lex functions are not used for wasm languages. Use those two fields + // to mark this language as WASM-based and to store the language's + // WASM-specific data. + language->lex_fn = ts_wasm_store__sentinel_lex_fn; + language->keyword_lex_fn = (void *)language_module; + + // Clear out any instances of languages that have been deleted. + for (unsigned i = 0; i < self->language_instances.size; i++) { + WasmLanguageId *id = self->language_instances.contents[i].language_id; + if (id->is_language_deleted) { + language_id_delete(id); + array_erase(&self->language_instances, i); + i--; + } + } + + // Store this store's instance of this language module. + array_push(&self->language_instances, ((LanguageWasmInstance) { + .language_id = language_id_clone(language_module->language_id), + .instance = instance, + .external_states_address = wasm_language.external_scanner.states, + .lex_main_fn_index = wasm_language.lex_fn, + .lex_keyword_fn_index = wasm_language.keyword_lex_fn, + .scanner_create_fn_index = wasm_language.external_scanner.create, + .scanner_destroy_fn_index = wasm_language.external_scanner.destroy, + .scanner_serialize_fn_index = wasm_language.external_scanner.serialize, + .scanner_deserialize_fn_index = wasm_language.external_scanner.deserialize, + .scanner_scan_fn_index = wasm_language.external_scanner.scan, + })); + + return language; + +error: + if (module) wasmtime_module_delete(module); + return NULL; +} + +bool ts_wasm_store_add_language( + TSWasmStore *self, + const TSLanguage *language, + uint32_t *index +) { + wasmtime_context_t *context = wasmtime_store_context(self->store); + const LanguageWasmModule *language_module = (void *)language->keyword_lex_fn; + + // Search for this store's instance of the language module. Also clear out any + // instances of languages that have been deleted. + bool exists = false; + for (unsigned i = 0; i < self->language_instances.size; i++) { + WasmLanguageId *id = self->language_instances.contents[i].language_id; + if (id->is_language_deleted) { + language_id_delete(id); + array_erase(&self->language_instances, i); + i--; + } else if (id == language_module->language_id) { + exists = true; + *index = i; + } + } + + // If the language module has not been instantiated in this store, then add + // it to this store. + if (!exists) { + *index = self->language_instances.size; + char *message; + wasmtime_instance_t instance; + int32_t language_address; + if (!ts_wasm_store__instantiate( + self, + language_module->module, + language_module->name, + &language_module->dylink_info, + &instance, + &language_address, + &message + )) { + ts_free(message); + return false; + } + + LanguageInWasmMemory wasm_language; + const uint8_t *memory = wasmtime_memory_data(context, &self->memory); + memcpy(&wasm_language, &memory[language_address], sizeof(LanguageInWasmMemory)); + array_push(&self->language_instances, ((LanguageWasmInstance) { + .language_id = language_id_clone(language_module->language_id), + .instance = instance, + .external_states_address = wasm_language.external_scanner.states, + .lex_main_fn_index = wasm_language.lex_fn, + .lex_keyword_fn_index = wasm_language.keyword_lex_fn, + .scanner_create_fn_index = wasm_language.external_scanner.create, + .scanner_destroy_fn_index = wasm_language.external_scanner.destroy, + .scanner_serialize_fn_index = wasm_language.external_scanner.serialize, + .scanner_deserialize_fn_index = wasm_language.external_scanner.deserialize, + .scanner_scan_fn_index = wasm_language.external_scanner.scan, + })); + } + + return true; +} + +void ts_wasm_store_reset_heap(TSWasmStore *self) { + wasmtime_context_t *context = wasmtime_store_context(self->store); + wasmtime_func_t func = { + self->function_table.store_id, + self->builtin_fn_indices.reset_heap + }; + wasm_trap_t *trap = NULL; + wasmtime_val_t args[1] = { + {.of.i32 = ts_wasm_store__heap_address(self), .kind = WASMTIME_I32}, + }; + + wasmtime_error_t *error = wasmtime_func_call(context, &func, args, 1, NULL, 0, &trap); + assert(!error); + assert(!trap); +} + +bool ts_wasm_store_start(TSWasmStore *self, TSLexer *lexer, const TSLanguage *language) { + uint32_t instance_index; + if (!ts_wasm_store_add_language(self, language, &instance_index)) return false; + self->current_lexer = lexer; + self->current_instance = &self->language_instances.contents[instance_index]; + self->has_error = false; + ts_wasm_store_reset_heap(self); + return true; +} + +void ts_wasm_store_reset(TSWasmStore *self) { + self->current_lexer = NULL; + self->current_instance = NULL; + self->has_error = false; + ts_wasm_store_reset_heap(self); +} + +static void ts_wasm_store__call( + TSWasmStore *self, + int32_t function_index, + wasmtime_val_raw_t *args_and_results, + size_t args_and_results_len +) { + wasmtime_context_t *context = wasmtime_store_context(self->store); + wasmtime_val_t value; + bool succeeded = wasmtime_table_get(context, &self->function_table, function_index, &value); + assert(succeeded); + assert(value.kind == WASMTIME_FUNCREF); + wasmtime_func_t func = value.of.funcref; + + wasm_trap_t *trap = NULL; + wasmtime_error_t *error = wasmtime_func_call_unchecked(context, &func, args_and_results, args_and_results_len, &trap); + if (error) { + // wasm_message_t message; + // wasmtime_error_message(error, &message); + // fprintf( + // stderr, + // "error in wasm module: %.*s\n", + // (int)message.size, message.data + // ); + wasmtime_error_delete(error); + self->has_error = true; + } else if (trap) { + // wasm_message_t message; + // wasm_trap_message(trap, &message); + // fprintf( + // stderr, + // "trap in wasm module: %.*s\n", + // (int)message.size, message.data + // ); + wasm_trap_delete(trap); + self->has_error = true; + } +} + +static bool ts_wasm_store__call_lex_function(TSWasmStore *self, unsigned function_index, TSStateId state) { + wasmtime_context_t *context = wasmtime_store_context(self->store); + uint8_t *memory_data = wasmtime_memory_data(context, &self->memory); + memcpy( + &memory_data[self->lexer_address], + &self->current_lexer->lookahead, + sizeof(self->current_lexer->lookahead) + ); + + wasmtime_val_raw_t args[2] = { + {.i32 = self->lexer_address}, + {.i32 = state}, + }; + ts_wasm_store__call(self, function_index, args, 2); + if (self->has_error) return false; + bool result = args[0].i32; + + memcpy( + &self->current_lexer->lookahead, + &memory_data[self->lexer_address], + sizeof(self->current_lexer->lookahead) + sizeof(self->current_lexer->result_symbol) + ); + return result; +} + +bool ts_wasm_store_call_lex_main(TSWasmStore *self, TSStateId state) { + return ts_wasm_store__call_lex_function( + self, + self->current_instance->lex_main_fn_index, + state + ); +} + +bool ts_wasm_store_call_lex_keyword(TSWasmStore *self, TSStateId state) { + return ts_wasm_store__call_lex_function( + self, + self->current_instance->lex_keyword_fn_index, + state + ); +} + +uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *self) { + wasmtime_val_raw_t args[1] = {{.i32 = 0}}; + ts_wasm_store__call(self, self->current_instance->scanner_create_fn_index, args, 1); + if (self->has_error) return 0; + return args[0].i32; +} + +void ts_wasm_store_call_scanner_destroy(TSWasmStore *self, uint32_t scanner_address) { + if (self->current_instance) { + wasmtime_val_raw_t args[1] = {{.i32 = scanner_address}}; + ts_wasm_store__call(self, self->current_instance->scanner_destroy_fn_index, args, 1); + } +} + +bool ts_wasm_store_call_scanner_scan( + TSWasmStore *self, + uint32_t scanner_address, + uint32_t valid_tokens_ix +) { + wasmtime_context_t *context = wasmtime_store_context(self->store); + uint8_t *memory_data = wasmtime_memory_data(context, &self->memory); + + memcpy( + &memory_data[self->lexer_address], + &self->current_lexer->lookahead, + sizeof(self->current_lexer->lookahead) + ); + + uint32_t valid_tokens_address = + self->current_instance->external_states_address + + (valid_tokens_ix * sizeof(bool)); + wasmtime_val_raw_t args[3] = { + {.i32 = scanner_address}, + {.i32 = self->lexer_address}, + {.i32 = valid_tokens_address} + }; + ts_wasm_store__call(self, self->current_instance->scanner_scan_fn_index, args, 3); + if (self->has_error) return false; + + memcpy( + &self->current_lexer->lookahead, + &memory_data[self->lexer_address], + sizeof(self->current_lexer->lookahead) + sizeof(self->current_lexer->result_symbol) + ); + return args[0].i32; +} + +uint32_t ts_wasm_store_call_scanner_serialize( + TSWasmStore *self, + uint32_t scanner_address, + char *buffer +) { + wasmtime_context_t *context = wasmtime_store_context(self->store); + uint8_t *memory_data = wasmtime_memory_data(context, &self->memory); + uint32_t serialization_buffer_address = ts_wasm_store__serialization_buffer_address(self); + + wasmtime_val_raw_t args[2] = { + {.i32 = scanner_address}, + {.i32 = serialization_buffer_address}, + }; + ts_wasm_store__call(self, self->current_instance->scanner_serialize_fn_index, args, 2); + if (self->has_error) return 0; + + uint32_t length = args[0].i32; + if (length > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { + self->has_error = true; + return 0; + } + + if (length > 0) { + memcpy( + ((Lexer *)self->current_lexer)->debug_buffer, + &memory_data[serialization_buffer_address], + length + ); + } + return length; +} + +void ts_wasm_store_call_scanner_deserialize( + TSWasmStore *self, + uint32_t scanner_address, + const char *buffer, + unsigned length +) { + wasmtime_context_t *context = wasmtime_store_context(self->store); + uint8_t *memory_data = wasmtime_memory_data(context, &self->memory); + uint32_t serialization_buffer_address = ts_wasm_store__serialization_buffer_address(self); + + if (length > 0) { + memcpy( + &memory_data[serialization_buffer_address], + buffer, + length + ); + } + + wasmtime_val_raw_t args[3] = { + {.i32 = scanner_address}, + {.i32 = serialization_buffer_address}, + {.i32 = length}, + }; + ts_wasm_store__call(self, self->current_instance->scanner_deserialize_fn_index, args, 3); +} + +bool ts_wasm_store_has_error(const TSWasmStore *self) { + return self->has_error; +} + +bool ts_language_is_wasm(const TSLanguage *self) { + return self->lex_fn == ts_wasm_store__sentinel_lex_fn; +} + +static inline LanguageWasmModule *ts_language__wasm_module(const TSLanguage *self) { + return (LanguageWasmModule *)self->keyword_lex_fn; +} + +void ts_wasm_language_retain(const TSLanguage *self) { + LanguageWasmModule *module = ts_language__wasm_module(self); + assert(module->ref_count > 0); + atomic_inc(&module->ref_count); +} + +void ts_wasm_language_release(const TSLanguage *self) { + LanguageWasmModule *module = ts_language__wasm_module(self); + assert(module->ref_count > 0); + if (atomic_dec(&module->ref_count) == 0) { + // Update the language id to reflect that the language is deleted. This allows any wasm stores + // that hold wasm instances for this language to delete those instances. + atomic_inc(&module->language_id->is_language_deleted); + language_id_delete(module->language_id); + + ts_free((void *)module->field_name_buffer); + ts_free((void *)module->symbol_name_buffer); + ts_free((void *)module->name); + wasmtime_module_delete(module->module); + ts_free(module); + + ts_free((void *)self->alias_map); + ts_free((void *)self->alias_sequences); + ts_free((void *)self->external_scanner.symbol_map); + ts_free((void *)self->field_map_entries); + ts_free((void *)self->field_map_slices); + ts_free((void *)self->field_names); + ts_free((void *)self->lex_modes); + ts_free((void *)self->parse_actions); + ts_free((void *)self->parse_table); + ts_free((void *)self->primary_state_ids); + ts_free((void *)self->public_symbol_map); + ts_free((void *)self->small_parse_table); + ts_free((void *)self->small_parse_table_map); + ts_free((void *)self->symbol_metadata); + ts_free((void *)self->symbol_names); + ts_free((void *)self); + } +} + +#else + +// If the WASM feature is not enabled, define dummy versions of all of the +// wasm-related functions. + +void ts_wasm_store_delete(TSWasmStore *self) { + (void)self; +} + +bool ts_wasm_store_start( + TSWasmStore *self, + TSLexer *lexer, + const TSLanguage *language +) { + (void)self; + (void)lexer; + (void)language; + return false; +} + +void ts_wasm_store_reset(TSWasmStore *self) { + (void)self; +} + +bool ts_wasm_store_call_lex_main(TSWasmStore *self, TSStateId state) { + (void)self; + (void)state; + return false; +} + +bool ts_wasm_store_call_lex_keyword(TSWasmStore *self, TSStateId state) { + (void)self; + (void)state; + return false; +} + +uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *self) { + (void)self; + return 0; +} + +void ts_wasm_store_call_scanner_destroy( + TSWasmStore *self, + uint32_t scanner_address +) { + (void)self; + (void)scanner_address; +} + +bool ts_wasm_store_call_scanner_scan( + TSWasmStore *self, + uint32_t scanner_address, + uint32_t valid_tokens_ix +) { + (void)self; + (void)scanner_address; + (void)valid_tokens_ix; + return false; +} + +uint32_t ts_wasm_store_call_scanner_serialize( + TSWasmStore *self, + uint32_t scanner_address, + char *buffer +) { + (void)self; + (void)scanner_address; + (void)buffer; + return 0; +} + +void ts_wasm_store_call_scanner_deserialize( + TSWasmStore *self, + uint32_t scanner_address, + const char *buffer, + unsigned length +) { + (void)self; + (void)scanner_address; + (void)buffer; + (void)length; +} + +bool ts_wasm_store_has_error(const TSWasmStore *self) { + (void)self; + return false; +} + +bool ts_language_is_wasm(const TSLanguage *self) { + (void)self; + return false; +} + +void ts_wasm_language_retain(const TSLanguage *self) { + (void)self; +} + +void ts_wasm_language_release(const TSLanguage *self) { + (void)self; +} + +#endif diff --git a/lib/src/wasm_store.h b/lib/src/wasm_store.h new file mode 100644 index 0000000..212f30d --- /dev/null +++ b/lib/src/wasm_store.h @@ -0,0 +1,31 @@ +#ifndef TREE_SITTER_WASM_H_ +#define TREE_SITTER_WASM_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "tree_sitter/api.h" +#include "./parser.h" + +bool ts_wasm_store_start(TSWasmStore *, TSLexer *, const TSLanguage *); +void ts_wasm_store_reset(TSWasmStore *); +bool ts_wasm_store_has_error(const TSWasmStore *); + +bool ts_wasm_store_call_lex_main(TSWasmStore *, TSStateId); +bool ts_wasm_store_call_lex_keyword(TSWasmStore *, TSStateId); + +uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *); +void ts_wasm_store_call_scanner_destroy(TSWasmStore *, uint32_t); +bool ts_wasm_store_call_scanner_scan(TSWasmStore *, uint32_t, uint32_t); +uint32_t ts_wasm_store_call_scanner_serialize(TSWasmStore *, uint32_t, char *); +void ts_wasm_store_call_scanner_deserialize(TSWasmStore *, uint32_t, const char *, unsigned); + +void ts_wasm_language_retain(const TSLanguage *); +void ts_wasm_language_release(const TSLanguage *); + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_WASM_H_ diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..4fd08b8 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,6 @@ +comment_width = 100 +format_code_in_doc_comments = true +format_macro_matchers = true +imports_granularity = "Crate" +group_imports = "StdExternalCrate" +wrap_comments = true diff --git a/script/benchmark.cmd b/script/benchmark.cmd index bbb2df5..fff4c88 100644 --- a/script/benchmark.cmd +++ b/script/benchmark.cmd @@ -1,3 +1,4 @@ @echo off cargo bench benchmark -p tree-sitter-cli +exit /b %errorlevel% diff --git a/script/build-fuzzers b/script/build-fuzzers index bff43c8..b54875e 100755 --- a/script/build-fuzzers +++ b/script/build-fuzzers @@ -6,14 +6,8 @@ if [[ "$(uname -s)" != Linux ]]; then exit 1 fi -if [[ -z "$LIB_FUZZER_PATH" ]]; then - echo "LIB_FUZZER_PATH not set" - exit 1 -fi - CC=${CC:-clang} CXX=${CXX:-clang++} -LINK=${LINK:-clang++} default_fuzz_flags="-fsanitize=fuzzer,address,undefined" @@ -21,7 +15,7 @@ CFLAGS=${CFLAGS:-"$default_fuzz_flags"} CXXFLAGS=${CXXFLAGS:-"$default_fuzz_flags"} export CFLAGS -make +make CC="$CC" CXX="$CXX" if [ -z "$@" ]; then languages=$(ls test/fixtures/grammars) @@ -29,7 +23,13 @@ else languages="$@" fi +mkdir -p test/fuzz/out + for lang in ${languages[@]}; do + # skip typescript + if [[ $lang == "typescript" ]]; then + continue + fi echo "Building $lang fuzzer..." lang_dir="test/fixtures/grammars/$lang" @@ -42,7 +42,7 @@ for lang in ${languages[@]}; do $CXX $CXXFLAGS -g -O1 "-I${lang_dir}/src" -c "${lang_scanner}.cc" -o "${lang_scanner}.o" objects+=("${lang_scanner}.o") elif [ -e "${lang_scanner}.c" ]; then - $CC $CFLAGS -std=c99 -g -O1 "-I${lang_dir}/src" -c "${lang_scanner}.c" -o "${lang_scanner}.o" + $CC $CFLAGS -std=c11 -g -O1 "-I${lang_dir}/src" -c "${lang_scanner}.c" -o "${lang_scanner}.o" objects+=("${lang_scanner}.o") fi @@ -54,7 +54,7 @@ for lang in ${languages[@]}; do highlights_filename="${lang_dir}/queries/highlights.scm" if [ -e "${highlights_filename}" ]; then ts_lang_query_filename="${lang}.scm" - cp "${highlights_filename}" "out/${ts_lang_query_filename}" + cp "${highlights_filename}" "test/fuzz/out/${ts_lang_query_filename}" else ts_lang_query_filename="" fi @@ -62,11 +62,16 @@ for lang in ${languages[@]}; do # FIXME: We should extract the grammar name from grammar.js. Use the name of # the directory instead. Also, the grammar name needs to be a valid C # identifier so replace any '-' characters - ts_lang="tree_sitter_$(echo $lang | tr -- - _)" + ts_lang="tree_sitter_$(echo "$lang" | tr -- - _)" $CXX $CXXFLAGS -std=c++11 -I lib/include -D TS_LANG="$ts_lang" -D TS_LANG_QUERY_FILENAME="\"${ts_lang_query_filename}\"" \ "test/fuzz/fuzzer.cc" "${objects[@]}" \ - libtree-sitter.a "$LIB_FUZZER_PATH" \ - -o "out/${lang}_fuzzer" + libtree-sitter.a \ + -o "test/fuzz/out/${lang}_fuzzer" - python test/fuzz/gen-dict.py "${lang_dir}/src/grammar.json" > "out/$lang.dict" + jq ' + [ .. + | if .type? == "STRING" or (.type? == "ALIAS" and .named? == false) then .value else empty end + | select(test("\\S") and length == utf8bytelength) + ] | unique | .[] + ' | sort done diff --git a/script/build-wasm b/script/build-wasm index dc42895..c96677f 100755 --- a/script/build-wasm +++ b/script/build-wasm @@ -30,10 +30,11 @@ EOF set -e web_dir=lib/binding_web +src_dir=lib/src emscripten_flags="-O3" minify_js=1 force_docker=0 -emscripen_version=$(cat "$(dirname "$0")"/../cli/emscripten-version) +emscripen_version=$(cat "$(dirname "$0")"/../cli/loader/emscripten-version) while [[ $# > 0 ]]; do case "$1" in @@ -51,6 +52,10 @@ while [[ $# > 0 ]]; do force_docker=1 ;; + -v|--verbose) + emscripten_flags="-s VERBOSE=1 -v $emscripten_flags" + ;; + *) usage echo "Unrecognized argument '$1'" @@ -60,21 +65,32 @@ while [[ $# > 0 ]]; do shift done -emcc= +emcc="" +docker="" if which emcc > /dev/null && [[ "$force_docker" == "0" ]]; then emcc=emcc elif which docker > /dev/null; then - emcc="docker run \ + # detect which one to use + docker=docker +elif which podman > /dev/null; then + docker=podman +fi + +if [ -z "$emcc" ] && [ -n "$docker" ]; then + export PODMAN_USERNS=keep-id + emcc="$docker run \ --rm \ -v $(pwd):/src:Z \ -u $(id -u) \ emscripten/emsdk:$emscripen_version \ emcc" -else +fi + +if [ -z "$emcc" ]; then if [[ "$force_docker" == "1" ]]; then - echo 'You must have `docker` on your PATH to run this script with --docker' + echo 'You must have `docker` or `podman` on your PATH to run this script with --docker' else - echo 'You must have either `docker` or `emcc` on your PATH to run this script' + echo 'You must have either `docker`, `podman`, or `emcc` on your PATH to run this script' fi exit 1 fi @@ -83,24 +99,33 @@ mkdir -p target/scratch runtime_methods='stringToUTF16','AsciiToString' +# Remove quotes, add leading underscores, remove newlines, remove trailing comma. +EXPORTED_FUNCTIONS=$( \ + cat ${src_dir}/wasm/stdlib-symbols.txt ${web_dir}/exports.txt | \ + sed -e 's/"//g' | \ + sed -e 's/^/_/g' | \ + tr -d '\n"' | \ + sed -e 's/,$//' \ +) + # Use emscripten to generate `tree-sitter.js` and `tree-sitter.wasm` # in the `target/scratch` directory $emcc \ -s WASM=1 \ - -s TOTAL_MEMORY=33554432 \ + -s INITIAL_MEMORY=33554432 \ -s ALLOW_MEMORY_GROWTH=1 \ -s MAIN_MODULE=2 \ - -s NO_FILESYSTEM=1 \ + -s FILESYSTEM=0 \ -s NODEJS_CATCH_EXIT=0 \ -s NODEJS_CATCH_REJECTION=0 \ - -s EXPORTED_FUNCTIONS=@${web_dir}/exports.json \ + -s EXPORTED_FUNCTIONS=${EXPORTED_FUNCTIONS} \ -s EXPORTED_RUNTIME_METHODS=$runtime_methods \ $emscripten_flags \ -fno-exceptions \ - -std=c99 \ + -std=c11 \ -D 'fprintf(...)=' \ -D NDEBUG= \ - -I lib/src \ + -I ${src_dir} \ -I lib/include \ --js-library ${web_dir}/imports.js \ --pre-js ${web_dir}/prefix.js \ @@ -124,9 +149,9 @@ if [[ "$minify_js" == "1" ]]; then --mangle \ --keep-classnames \ -- target/scratch/tree-sitter.js \ - > $web_dir/tree-sitter.js + > ${web_dir}/tree-sitter.js else - cp target/scratch/tree-sitter.js $web_dir/tree-sitter.js + cp target/scratch/tree-sitter.js ${web_dir}/tree-sitter.js fi -mv target/scratch/tree-sitter.wasm $web_dir/tree-sitter.wasm +mv target/scratch/tree-sitter.wasm ${web_dir}/tree-sitter.wasm diff --git a/script/build-wasm-stdlib b/script/build-wasm-stdlib new file mode 100755 index 0000000..fccac96 --- /dev/null +++ b/script/build-wasm-stdlib @@ -0,0 +1,34 @@ +#!/bin/bash + +set -e + +# Remove quotes and commas +EXPORTED_FUNCTIONS=$( \ + cat lib/src/wasm/stdlib-symbols.txt | \ + tr -d ',"' \ +) + +EXPORT_FLAGS="" +for function in ${EXPORTED_FUNCTIONS}; do + EXPORT_FLAGS+=" -Wl,--export=${function}" +done + +target/wasi-sdk-21.0/bin/clang-17 \ + -o stdlib.wasm \ + -Os \ + -fPIC \ + -Wl,--no-entry \ + -Wl,--stack-first \ + -Wl,-z -Wl,stack-size=65536 \ + -Wl,--import-undefined \ + -Wl,--import-memory \ + -Wl,--import-table \ + -Wl,--strip-debug \ + -Wl,--export=reset_heap \ + -Wl,--export=__wasm_call_ctors \ + -Wl,--export=__stack_pointer \ + ${EXPORT_FLAGS} \ + lib/src/wasm/stdlib.c + +xxd -C -i stdlib.wasm > lib/src/wasm/wasm-stdlib.h +mv stdlib.wasm target/ diff --git a/script/cliff.toml b/script/cliff.toml new file mode 100644 index 0000000..9520411 --- /dev/null +++ b/script/cliff.toml @@ -0,0 +1,72 @@ +[changelog] +# changelog header +header = """ +# Changelog\n +""" +# template for the changelog body +# https://tera.netlify.app/docs/#introduction +body = """ +{% if version %}\ + ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }} +{% else %}\ + ## [unreleased] +{% endif %}\ +{% for group, commits in commits | group_by(attribute="group") %} + ### {{ group | striptags | upper_first }} + {% for commit in commits%}\ + {% if not commit.scope %}\ + - {{ commit.message | upper_first }}\ + {% if commit.github.pr_number %} (){%- endif %} + {% endif %}\ + {% endfor %}\ + {% for group, commits in commits | group_by(attribute="scope") %}\ + {% for commit in commits %}\ + - **{{commit.scope}}**: {{ commit.message | upper_first }}\ + {% if commit.github.pr_number %} (){%- endif %} + {% endfor %}\ + {% endfor %} +{% endfor %} +""" +# remove the leading and trailing whitespace from the template +trim = true + +[git] +# parse the commits based on https://www.conventionalcommits.org +conventional_commits = true +# filter out the commits that are not conventional +filter_unconventional = false +# process each line of a commit as an individual commit +split_commits = false +# regex for preprocessing the commit messages +commit_preprocessors = [ + # { pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](https://github.com/neovim/neovim/issues/${2}))"}, +] +# regex for parsing and grouping commits +commit_parsers = [ + { message = "!:", group = "Breaking" }, + { message = "^feat", group = "Features" }, + { message = "^fix", group = "Bug Fixes" }, + { message = "^perf", group = "Performance" }, + { message = "^doc", group = "Documentation" }, + { message = "^refactor", group = "Refactor" }, + { message = "^test", group = "Testing" }, + { message = "^build", group = "Build System and CI" }, + { message = "^ci", group = "Build System and CI" }, + { message = ".*", group = "Other" }, +] +# filter out the commits that are not matched by commit parsers +filter_commits = false +# glob pattern for matching git tags +tag_pattern = "v[0-9]*" +# regex for skipping tags +skip_tags = "v0.1.0-beta.1" +# regex for ignoring tags +ignore_tags = "" +# sort the tags chronologically +date_order = false +# sort the commits inside sections by oldest/newest order +sort_commits = "oldest" + +[remote.github] +owner = "tree-sitter" +repo = "tree-sitter" diff --git a/script/fetch-emscripten b/script/fetch-emscripten index 6f35284..6188881 100755 --- a/script/fetch-emscripten +++ b/script/fetch-emscripten @@ -2,7 +2,7 @@ set -e -EMSCRIPTEN_VERSION=$(cat "$(dirname "$0")/../cli/emscripten-version") +EMSCRIPTEN_VERSION=$(cat "$(dirname "$0")/../cli/loader/emscripten-version") mkdir -p target EMSDK_DIR="./target/emsdk" @@ -28,7 +28,5 @@ EMSDK_DIR="./target/emsdk" ) >&2 ( - source "$EMSDK_DIR/emsdk_env.sh" > /dev/null - emcc --version >&2 - echo export $(env | egrep '^(PATH|EM.*)') + echo "source \"$EMSDK_DIR/emsdk_env.sh\"" ) diff --git a/script/fetch-fixtures b/script/fetch-fixtures index 8d21193..59af3f8 100755 --- a/script/fetch-fixtures +++ b/script/fetch-fixtures @@ -1,6 +1,6 @@ #!/usr/bin/env bash -GRAMMARS_DIR=$(dirname $0)/../test/fixtures/grammars +GRAMMARS_DIR=$(dirname "$0")/../test/fixtures/grammars fetch_grammar() { local grammar=$1 @@ -10,13 +10,13 @@ fetch_grammar() { echo "Updating ${grammar} grammar..." - if [ ! -d $grammar_dir ]; then - git clone $grammar_url $grammar_dir --depth=1 + if [ ! -d "$grammar_dir" ]; then + git clone "$grammar_url" "$grammar_dir" --depth=1 fi ( - cd $grammar_dir - git fetch origin $ref --depth=1 + cd "$grammar_dir" || exit + git fetch origin "$ref" --depth=1 git reset --hard FETCH_HEAD ) } @@ -28,7 +28,7 @@ fetch_grammar embedded-template master fetch_grammar go master fetch_grammar html master fetch_grammar java master -fetch_grammar javascript partial-order-precedences +fetch_grammar javascript master fetch_grammar jsdoc master fetch_grammar json master fetch_grammar php master diff --git a/script/fetch-fixtures.cmd b/script/fetch-fixtures.cmd index 3dad731..32727b0 100644 --- a/script/fetch-fixtures.cmd +++ b/script/fetch-fixtures.cmd @@ -7,7 +7,7 @@ call:fetch_grammar embedded-template master call:fetch_grammar go master call:fetch_grammar html master call:fetch_grammar java master -call:fetch_grammar javascript partial-order-precedences +call:fetch_grammar javascript master call:fetch_grammar jsdoc master call:fetch_grammar json master call:fetch_grammar php master diff --git a/script/generate-bindings b/script/generate-bindings index 54abac0..659337c 100755 --- a/script/generate-bindings +++ b/script/generate-bindings @@ -2,23 +2,42 @@ output_path=lib/binding_rust/bindings.rs header_path='lib/include/tree_sitter/api.h' +no_derive_copy=( + TSInput + TSLanguage + TSLogger + TSLookaheadIterator + TSParser + TSTree + TSQuery + TSQueryCursor + TSQueryCapture + TSQueryMatch + TSQueryPredicateStep +) +no_copy=$(IFS='|'; echo "${no_derive_copy[*]}") + +file_version=$(head -n1 "$output_path" | cut -d' ' -f6) +tool_version=$(bindgen --version | cut -d' ' -f2) +higher_version=$(echo -e "${file_version}\n${tool_version}" | sort -V | tail -n1) + +if [ "$higher_version" != "$tool_version" ]; then + echo "Latest used bindgen version was $file_version" >&2 + echo "Currently installed bindgen CLI version is $tool_version" >&2 + echo >&2 + echo "It's needed to upgrade bindgen CLI first with \`cargo install bindgen-cli\`" >&2 + exit 1 +fi bindgen \ --no-layout-tests \ - --whitelist-type '^TS.*' \ - --whitelist-function '^ts_.*' \ + --allowlist-type '^TS.*' \ + --allowlist-function '^ts_.*' \ + --allowlist-var "^TREE_SITTER.*" \ --blocklist-type '^__.*' \ - --size_t-is-usize \ - $header_path > $output_path - -echo "" >> $output_path - -defines=( - TREE_SITTER_LANGUAGE_VERSION - TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION -) - -for define in ${defines[@]}; do - define_value=$(egrep "#define $define (.*)" $header_path | cut -d' ' -f3) - echo "pub const $define: usize = $define_value;" >> $output_path -done + --no-prepend-enum-name \ + --no-copy "$no_copy" \ + $header_path \ + -- \ + -D TREE_SITTER_FEATURE_WASM \ + > $output_path diff --git a/script/generate-fixtures b/script/generate-fixtures index 2c3b178..b5d95f1 100755 --- a/script/generate-fixtures +++ b/script/generate-fixtures @@ -15,7 +15,7 @@ fi filter_grammar_name=$1 grammars_dir=${root_dir}/test/fixtures/grammars -grammar_files=$(find $grammars_dir -name grammar.js | grep -v node_modules) +grammar_files=$(find "$grammars_dir" -name grammar.js | grep -v node_modules) while read -r grammar_file; do grammar_dir=$(dirname "$grammar_file") @@ -27,7 +27,7 @@ while read -r grammar_file; do echo "Regenerating ${grammar_name} parser" ( - cd $grammar_dir + cd "$grammar_dir" "$tree_sitter" generate src/grammar.json --no-bindings --abi=latest ) -done <<< "$grammar_files" +done <<<"$grammar_files" diff --git a/script/generate-fixtures-wasm b/script/generate-fixtures-wasm index 4bba56a..eba7470 100755 --- a/script/generate-fixtures-wasm +++ b/script/generate-fixtures-wasm @@ -21,7 +21,7 @@ fi filter_grammar_name=$1 grammars_dir=${root_dir}/test/fixtures/grammars -grammar_files=$(find $grammars_dir -name grammar.js | grep -v node_modules) +grammar_files=$(find "$grammars_dir" -name grammar.js | grep -v node_modules) while read -r grammar_file; do grammar_dir=$(dirname "$grammar_file") @@ -32,7 +32,5 @@ while read -r grammar_file; do fi echo "Compiling ${grammar_name} parser to wasm" - "$tree_sitter" build-wasm $build_wasm_args $grammar_dir -done <<< "$grammar_files" - -mv tree-sitter-*.wasm target/release/ + "$tree_sitter" build --wasm $build_wasm_args -o target/release/tree-sitter-"${grammar_name}".wasm "$grammar_dir" +done <<<"$grammar_files" diff --git a/script/reproduce b/script/reproduce deleted file mode 120000 index 1c28442..0000000 --- a/script/reproduce +++ /dev/null @@ -1 +0,0 @@ -run-fuzzer \ No newline at end of file diff --git a/script/reproduce b/script/reproduce new file mode 100755 index 0000000..80b01af --- /dev/null +++ b/script/reproduce @@ -0,0 +1,30 @@ +#!/bin/bash + +set -eux + +root=$(dirname "$0")/.. +export ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1:symbolize=1" +export UBSAN="print_stacktrace=1:halt_on_error=1:symbolize=1" + +# check if CI env var exists + +if [ -z "${CI:-}" ]; then + declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=2048" ["recover"]="-timeout=10 -rss_limit_mb=2048" ) +else + declare -A mode_config=( ["halt"]="-max_total_time=120 -timeout=1 -rss_limit_mb=2048" ["recover"]="-time=120 -timeout=10 -rss_limit_mb=2048" ) +fi + +if [ "$#" -lt 3 ]; then + echo "usage: $0 (halt|recover) " + exit 1 +fi + +lang="$1" +shift +mode="$1" +shift +testcase="$1" +shift +# Treat remainder of arguments as libFuzzer arguments + +"${root}/test/fuzz/out/${lang}_fuzzer" "${mode_config[$mode]}" -runs=1 "${testcase}" "$@" diff --git a/script/run-fuzzer b/script/run-fuzzer index ddd481d..d1e9631 100755 --- a/script/run-fuzzer +++ b/script/run-fuzzer @@ -6,52 +6,33 @@ root=$(dirname "$0")/.. export ASAN_OPTIONS="quarantine_size_mb=10:detect_leaks=1:symbolize=1" export UBSAN="print_stacktrace=1:halt_on_error=1:symbolize=1" -declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=256" ["recover"]="-timeout=10 -rss_limit_mb=256" ) - -run_fuzzer() { - if [ "$#" -lt 2 ]; then - echo "usage: $0 " - exit 1 - fi - - lang="$1" - shift - mode="$1" - shift - # Treat remainder of arguments as libFuzzer arguments - - # Fuzzing logs and testcases are always written to `pwd`, so `cd` there first - results="${root}/out/fuzz-results/${lang}_${mode}" - mkdir -p "${results}" - cd "${results}" - - # Create a corpus directory, so new discoveries are stored on disk. These will - # then be loaded on subsequent fuzzing runs - mkdir -p corpus - - "../../${lang}_fuzzer_${mode}" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_${mode}_" -max_len=2048 ${mode_config[$mode]} "./corpus" "$@" -} - -reproduce() { - if [ "$#" -lt 3 ]; then - echo "usage: $0 (halt|recover) " - exit 1 - fi - - lang="$1" - shift - mode="$1" - shift - testcase="$1" - shift - # Treat remainder of arguments as libFuzzer arguments - - "${root}/out/${lang}_fuzzer_${mode}" ${mode_config[$mode]} -runs=1 "${testcase}" "$@" -} - -script=$(basename "$0") -if [ "$script" == "run-fuzzer" ]; then - run_fuzzer "$@" -elif [ "$script" == "reproduce" ]; then - reproduce "$@" +# check if CI env var exists + +if [ -z "${CI:-}" ]; then + declare -A mode_config=( ["halt"]="-timeout=1 -rss_limit_mb=2048" ["recover"]="-timeout=10 -rss_limit_mb=2048" ) +else + declare -A mode_config=( ["halt"]="-max_total_time=120 -timeout=1 -rss_limit_mb=2048" ["recover"]="-time=120 -timeout=10 -rss_limit_mb=2048" ) +fi + +if [ "$#" -lt 2 ]; then + echo "usage: $0 " + exit 1 fi + +lang="$1" +shift +mode="$1" +shift +# Treat remainder of arguments as libFuzzer arguments + +# Fuzzing logs and testcases are always written to `pwd`, so `cd` there first +results="${root}/test/fuzz/out/fuzz-results/${lang}" +mkdir -p "${results}" +cd "${results}" + +# Create a corpus directory, so new discoveries are stored on disk. These will +# then be loaded on subsequent fuzzing runs +mkdir -p corpus + +pwd +"../../${lang}_fuzzer" "-dict=../../${lang}.dict" "-artifact_prefix=${lang}_" -max_len=2048 "${mode_config[$mode]}" "./corpus" "$@" diff --git a/script/test b/script/test index 619c190..a76c621 100755 --- a/script/test +++ b/script/test @@ -6,13 +6,13 @@ function usage { cat <<-EOF USAGE - $0 [-adDg] [-s SEED] [-l LANGUAGE] [-e EXAMPLE] [-t TRIAL] + $0 [-adDg] [-s SEED] [-l LANGUAGE] [-e EXAMPLE] OPTIONS -h Print this message - -a Compile C code with the Clang static analyzer + -a Compile C code with the Clang address sanitizer -e Run only the corpus tests whose name contain the given string @@ -41,9 +41,17 @@ while getopts "adDghl:e:s:i:" option; do exit ;; a) - export RUSTFLAGS="-Z sanitizer=address" - # Specify a `--target` explicitly. For some reason, this is required for - # address sanitizer support. + export CFLAGS="-fsanitize=undefined,address" + + # When the Tree-sitter C library is compiled with the address sanitizer, the address sanitizer + # runtime library needs to be linked into the final test executable. When using Xcode clang, + # the Rust linker doesn't know where to find that library, so we need to specify linker flags directly. + runtime_dir=$(cc -print-runtime-dir) + if [[ $runtime_dir == */Xcode.app/* ]]; then + export RUSTFLAGS="-C link-arg=-L${runtime_dir} -C link-arg=-lclang_rt.asan_osx_dynamic -C link-arg=-Wl,-rpath,${runtime_dir}" + fi + + # Specify a `--target` explicitly. This is required for address sanitizer support. toolchain=$(rustup show active-toolchain) toolchain_regex='(stable|beta|nightly)-([_a-z0-9-]+).*' if [[ $toolchain =~ $toolchain_regex ]]; then @@ -52,7 +60,8 @@ while getopts "adDghl:e:s:i:" option; do else echo "Failed to parse toolchain '${toolchain}'" fi - test_flags="${test_flags} --target ${current_target}" + + test_flags+=" --target ${current_target}" ;; e) export TREE_SITTER_EXAMPLE=${OPTARG} diff --git a/script/test.cmd b/script/test.cmd index 8baad2d..4dc97ef 100644 --- a/script/test.cmd +++ b/script/test.cmd @@ -4,4 +4,7 @@ setlocal set RUST_TEST_THREADS=1 set RUST_BACKTRACE=full cargo test "%~1" +if %errorlevel% NEQ 0 ( + exit /b %errorlevel% +) endlocal diff --git a/script/version b/script/version deleted file mode 100755 index ce4f6b8..0000000 --- a/script/version +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env node - -const fs = require('fs'); -const path = require('path'); -const {execFileSync} = require('child_process'); - -const cliPath = path.join(__dirname, '..', 'cli'); -const npmPath = path.join(cliPath, 'npm'); -const cargoTomlPath = path.join(cliPath, 'Cargo.toml'); - -const npmMetadata = require(path.join(npmPath, 'package.json')); -const npmVersion = npmMetadata.version; - -const cargoMetadata = fs.readFileSync(cargoTomlPath, 'utf8') -const cargoVersionMatch = cargoMetadata.match(/version = "([^"\n]+)"/); -const cargoVersion = cargoVersionMatch[1]; - -if (npmVersion !== cargoVersion) { - console.error(`NPM version ${npmVersion} does not match Cargo version ${cargoVersion}`); - process.exit(1); -} - -const arg = process.argv[2]; - -if (!arg) { - console.log([ - `Usage: script/version major | minor | patch | `, - '', - 'Update the CLI version by the given increment or to the given', - 'version number, creating a commit and tag for the new version.', - '' - ].join('\n')) - process.exit(1); -} - -if (arg) { - // Check that working directory is clean - const diff = execFileSync( - 'git', - ['diff', '--stat'], - {encoding: 'utf8'} - ); - if (diff.length !== 0) { - console.error('There are uncommitted changes.'); - process.exit(1); - } - - const newVersion = execFileSync( - 'npm', - ['version', process.argv[2], '--git-tag-version=false'], - {cwd: npmPath, encoding: 'utf8'} - ).trim().replace(/^v/, ''); - const newCargoVersionLine = cargoVersionMatch[0].replace(cargoVersion, newVersion); - const newCargoMetadata = cargoMetadata.replace(cargoVersionMatch[0], newCargoVersionLine); - fs.writeFileSync(cargoTomlPath, newCargoMetadata, 'utf8'); - execFileSync('cargo', ['build'], {cwd: cliPath}); - execFileSync('git', ['commit', '-a', '-m', newVersion]); - execFileSync('git', ['tag', 'v' + newVersion]); - console.log(newVersion) -} else { - console.log(npmVersion); -} diff --git a/tags/Cargo.toml b/tags/Cargo.toml index 99d053e..65cf925 100644 --- a/tags/Cargo.toml +++ b/tags/Cargo.toml @@ -1,27 +1,26 @@ [package] name = "tree-sitter-tags" +version.workspace = true description = "Library for extracting tag information" -version = "0.20.2" authors = [ "Max Brunsfeld ", "Patrick Thomson ", ] -license = "MIT" +edition.workspace = true +rust-version.workspace = true readme = "README.md" -edition = "2018" +homepage.workspace = true +repository.workspace = true +license.workspace = true keywords = ["incremental", "parsing", "syntax", "tagging"] categories = ["parsing", "text-editors"] -repository = "https://github.com/tree-sitter/tree-sitter" -rust-version.workspace = true [lib] crate-type = ["lib", "staticlib"] [dependencies] -regex = "1" -memchr = "2.3" -thiserror = "1.0" +memchr.workspace = true +regex.workspace = true +thiserror.workspace = true -[dependencies.tree-sitter] -version = "0.20" -path = "../lib" +tree-sitter.workspace = true diff --git a/tags/README.md b/tags/README.md index 36efa49..d7daac7 100644 --- a/tags/README.md +++ b/tags/README.md @@ -1,4 +1,9 @@ -# `tree-sitter-tags` +# Tree-sitter Tags + +[![crates.io badge]][crates.io] + +[crates.io]: https://crates.io/crates/tree-sitter-tags +[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-tags.svg?color=%23B48723 ### Usage diff --git a/tags/src/c_lib.rs b/tags/src/c_lib.rs index 088cc7b..6041642 100644 --- a/tags/src/c_lib.rs +++ b/tags/src/c_lib.rs @@ -1,12 +1,12 @@ -use super::{Error, TagsConfiguration, TagsContext}; -use std::collections::HashMap; -use std::ffi::CStr; -use std::os::raw::c_char; -use std::process::abort; -use std::sync::atomic::AtomicUsize; -use std::{fmt, slice, str}; +use std::{ + collections::HashMap, ffi::CStr, fmt, os::raw::c_char, process::abort, slice, str, + sync::atomic::AtomicUsize, +}; + use tree_sitter::Language; +use super::{Error, TagsConfiguration, TagsContext}; + const BUFFER_TAGS_RESERVE_CAPACITY: usize = 100; const BUFFER_DOCS_RESERVE_CAPACITY: usize = 1024; @@ -66,13 +66,29 @@ pub extern "C" fn ts_tagger_new() -> *mut TSTagger { })) } +/// Delete a [`TSTagger`]. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagger`] instance. #[no_mangle] -pub extern "C" fn ts_tagger_delete(this: *mut TSTagger) { - drop(unsafe { Box::from_raw(this) }) +pub unsafe extern "C" fn ts_tagger_delete(this: *mut TSTagger) { + drop(Box::from_raw(this)); } +/// Add a language to a [`TSTagger`]. +/// +/// Returns a [`TSTagsError`] indicating whether the operation was successful or not. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagger`] instance. +/// `scope_name` must be non-null and a valid pointer to a null-terminated string. +/// `tags_query` and `locals_query` must be non-null and valid pointers to strings. +/// +/// The caller must ensure that the lengths of `tags_query` and `locals_query` are correct. #[no_mangle] -pub extern "C" fn ts_tagger_add_language( +pub unsafe extern "C" fn ts_tagger_add_language( this: *mut TSTagger, scope_name: *const c_char, language: Language, @@ -82,16 +98,18 @@ pub extern "C" fn ts_tagger_add_language( locals_query_len: u32, ) -> TSTagsError { let tagger = unwrap_mut_ptr(this); - let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) }; - let tags_query = unsafe { slice::from_raw_parts(tags_query, tags_query_len as usize) }; - let locals_query = unsafe { slice::from_raw_parts(locals_query, locals_query_len as usize) }; - let tags_query = match str::from_utf8(tags_query) { - Ok(e) => e, - Err(_) => return TSTagsError::InvalidUtf8, + let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); + let tags_query = slice::from_raw_parts(tags_query, tags_query_len as usize); + let locals_query = if !locals_query.is_null() { + slice::from_raw_parts(locals_query, locals_query_len as usize) + } else { + &[] + }; + let Ok(tags_query) = str::from_utf8(tags_query) else { + return TSTagsError::InvalidUtf8; }; - let locals_query = match str::from_utf8(locals_query) { - Ok(e) => e, - Err(_) => return TSTagsError::InvalidUtf8, + let Ok(locals_query) = str::from_utf8(locals_query) else { + return TSTagsError::InvalidUtf8; }; match TagsConfiguration::new(language, tags_query, locals_query) { @@ -107,8 +125,19 @@ pub extern "C" fn ts_tagger_add_language( } } +/// Tags some source code. +/// +/// Returns a [`TSTagsError`] indicating whether the operation was successful or not. +/// +/// # Safety +/// +/// `this` must be a non-null valid pointer to a [`TSTagger`] instance. +/// `scope_name` must be a non-null valid pointer to a null-terminated string. +/// `source_code` must be a non-null valid pointer to a slice of bytes. +/// `output` must be a non-null valid pointer to a [`TSTagsBuffer`] instance. +/// `cancellation_flag` must be a non-null valid pointer to an [`AtomicUsize`] instance. #[no_mangle] -pub extern "C" fn ts_tagger_tag( +pub unsafe extern "C" fn ts_tagger_tag( this: *mut TSTagger, scope_name: *const c_char, source_code: *const u8, @@ -118,14 +147,14 @@ pub extern "C" fn ts_tagger_tag( ) -> TSTagsError { let tagger = unwrap_mut_ptr(this); let buffer = unwrap_mut_ptr(output); - let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) }; + let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); if let Some(config) = tagger.languages.get(scope_name) { shrink_and_clear(&mut buffer.tags, BUFFER_TAGS_RESERVE_CAPACITY); shrink_and_clear(&mut buffer.docs, BUFFER_DOCS_RESERVE_CAPACITY); - let source_code = unsafe { slice::from_raw_parts(source_code, source_code_len as usize) }; - let cancellation_flag = unsafe { cancellation_flag.as_ref() }; + let source_code = slice::from_raw_parts(source_code, source_code_len as usize); + let cancellation_flag = cancellation_flag.as_ref(); let tags = match buffer .context @@ -138,16 +167,13 @@ pub extern "C" fn ts_tagger_tag( Err(e) => { return match e { Error::InvalidLanguage => TSTagsError::InvalidLanguage, - Error::Cancelled => TSTagsError::Timeout, _ => TSTagsError::Timeout, } } }; for tag in tags { - let tag = if let Ok(tag) = tag { - tag - } else { + let Ok(tag) = tag else { buffer.tags.clear(); buffer.docs.clear(); return TSTagsError::Timeout; @@ -197,68 +223,122 @@ pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer { })) } +/// Delete a [`TSTagsBuffer`]. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by +/// [`ts_tags_buffer_new`]. #[no_mangle] -pub extern "C" fn ts_tags_buffer_delete(this: *mut TSTagsBuffer) { - drop(unsafe { Box::from_raw(this) }) +pub unsafe extern "C" fn ts_tags_buffer_delete(this: *mut TSTagsBuffer) { + drop(Box::from_raw(this)); } +/// Get the tags from a [`TSTagsBuffer`]. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by +/// [`ts_tags_buffer_new`]. +/// +/// The caller must ensure that the returned pointer is not used after the [`TSTagsBuffer`] +/// is deleted with [`ts_tags_buffer_delete`], else the data will point to garbage. #[no_mangle] -pub extern "C" fn ts_tags_buffer_tags(this: *const TSTagsBuffer) -> *const TSTag { - let buffer = unwrap_ptr(this); - buffer.tags.as_ptr() +pub unsafe extern "C" fn ts_tags_buffer_tags(this: *const TSTagsBuffer) -> *const TSTag { + unwrap_ptr(this).tags.as_ptr() } +/// Get the number of tags in a [`TSTagsBuffer`]. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance. #[no_mangle] -pub extern "C" fn ts_tags_buffer_tags_len(this: *const TSTagsBuffer) -> u32 { - let buffer = unwrap_ptr(this); - buffer.tags.len() as u32 +pub unsafe extern "C" fn ts_tags_buffer_tags_len(this: *const TSTagsBuffer) -> u32 { + unwrap_ptr(this).tags.len() as u32 } +/// Get the documentation strings from a [`TSTagsBuffer`]. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by +/// [`ts_tags_buffer_new`]. +/// +/// The caller must ensure that the returned pointer is not used after the [`TSTagsBuffer`] +/// is deleted with [`ts_tags_buffer_delete`], else the data will point to garbage. +/// +/// The returned pointer points to a C-style string. +/// To get the length of the string, use [`ts_tags_buffer_docs_len`]. #[no_mangle] -pub extern "C" fn ts_tags_buffer_docs(this: *const TSTagsBuffer) -> *const c_char { - let buffer = unwrap_ptr(this); - buffer.docs.as_ptr() as *const c_char +pub unsafe extern "C" fn ts_tags_buffer_docs(this: *const TSTagsBuffer) -> *const c_char { + unwrap_ptr(this).docs.as_ptr().cast::() } +/// Get the length of the documentation strings in a [`TSTagsBuffer`]. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by +/// [`ts_tags_buffer_new`]. #[no_mangle] -pub extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 { - let buffer = unwrap_ptr(this); - buffer.docs.len() as u32 +pub unsafe extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 { + unwrap_ptr(this).docs.len() as u32 } +/// Get whether or not a [`TSTagsBuffer`] contains any parse errors. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance created by +/// [`ts_tags_buffer_new`]. #[no_mangle] -pub extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool { - let buffer = unwrap_ptr(this); - buffer.errors_present +pub unsafe extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool { + unwrap_ptr(this).errors_present } +/// Get the syntax kinds for a given scope name. +/// +/// Returns a pointer to a null-terminated array of null-terminated strings. +/// +/// # Safety +/// +/// `this` must be non-null and a valid pointer to a [`TSTagger`] instance created by +/// [`ts_tagger_new`]. +/// `scope_name` must be non-null and a valid pointer to a null-terminated string. +/// `len` must be non-null and a valid pointer to a `u32`. +/// +/// The caller must ensure that the returned pointer is not used after the [`TSTagger`] +/// is deleted with [`ts_tagger_delete`], else the data will point to garbage. +/// +/// The returned pointer points to a C-style string array. #[no_mangle] -pub extern "C" fn ts_tagger_syntax_kinds_for_scope_name( +pub unsafe extern "C" fn ts_tagger_syntax_kinds_for_scope_name( this: *mut TSTagger, scope_name: *const c_char, len: *mut u32, ) -> *const *const c_char { let tagger = unwrap_mut_ptr(this); - let scope_name = unsafe { unwrap(CStr::from_ptr(scope_name).to_str()) }; + let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); let len = unwrap_mut_ptr(len); *len = 0; if let Some(config) = tagger.languages.get(scope_name) { *len = config.c_syntax_type_names.len() as u32; - return config.c_syntax_type_names.as_ptr() as *const *const c_char; + return config.c_syntax_type_names.as_ptr().cast::<*const c_char>(); } std::ptr::null() } -fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { - unsafe { result.as_ref() }.unwrap_or_else(|| { +unsafe fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { + result.as_ref().unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); }) } -fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { - unsafe { result.as_mut() }.unwrap_or_else(|| { +unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { + result.as_mut().unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); }) @@ -266,7 +346,7 @@ fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { fn unwrap(result: Result) -> T { result.unwrap_or_else(|error| { - eprintln!("tree-sitter tag error: {}", error); + eprintln!("tree-sitter tag error: {error}"); abort(); }) } diff --git a/tags/src/lib.rs b/tags/src/lib.rs index 13499d8..d23491d 100644 --- a/tags/src/lib.rs +++ b/tags/src/lib.rs @@ -1,13 +1,20 @@ +#![doc = include_str!("../README.md")] + pub mod c_lib; +use std::{ + char, + collections::HashMap, + ffi::{CStr, CString}, + mem, + ops::Range, + os::raw::c_char, + str, + sync::atomic::{AtomicUsize, Ordering}, +}; + use memchr::memchr; use regex::Regex; -use std::collections::HashMap; -use std::ffi::{CStr, CString}; -use std::ops::Range; -use std::os::raw::c_char; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::{char, mem, str}; use thiserror::Error; use tree_sitter::{ Language, LossyUtf8, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree, @@ -41,7 +48,7 @@ pub struct NamedCapture { } pub struct TagsContext { - parser: Parser, + pub parser: Parser, cursor: QueryCursor, } @@ -115,7 +122,7 @@ struct LineInfo { impl TagsConfiguration { pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result { - let query = Query::new(language, &format!("{}{}", locals_query, tags_query))?; + let query = Query::new(&language, &format!("{locals_query}{tags_query}"))?; let tags_query_offset = locals_query.len(); let mut tags_pattern_index = 0; @@ -134,14 +141,13 @@ impl TagsConfiguration { let mut local_scope_capture_index = None; let mut local_definition_capture_index = None; for (i, name) in query.capture_names().iter().enumerate() { - match name.as_str() { - "" => continue, + match *name { "name" => name_capture_index = Some(i as u32), "ignore" => ignore_capture_index = Some(i as u32), "doc" => doc_capture_index = Some(i as u32), "local.scope" => local_scope_capture_index = Some(i as u32), "local.definition" => local_definition_capture_index = Some(i as u32), - "local.reference" => continue, + "local.reference" | "" => continue, _ => { let mut is_definition = false; @@ -151,7 +157,7 @@ impl TagsConfiguration { } else if name.starts_with("reference.") { name.trim_start_matches("reference.") } else { - return Err(Error::InvalidCapture(name.to_string())); + return Err(Error::InvalidCapture((*name).to_string())); }; if let Ok(cstr) = CString::new(kind) { @@ -198,7 +204,7 @@ impl TagsConfiguration { } if let Some(doc_capture_index) = doc_capture_index { for predicate in query.general_predicates(pattern_index) { - if predicate.args.get(0) + if predicate.args.first() == Some(&QueryPredicateArg::Capture(doc_capture_index)) { match (predicate.operator.as_ref(), predicate.args.get(1)) { @@ -214,11 +220,11 @@ impl TagsConfiguration { } } } - return Ok(info); + Ok(info) }) .collect::, Error>>()?; - Ok(TagsConfiguration { + Ok(Self { language, query, syntax_type_names, @@ -227,26 +233,37 @@ impl TagsConfiguration { doc_capture_index, name_capture_index, ignore_capture_index, - tags_pattern_index, local_scope_capture_index, local_definition_capture_index, + tags_pattern_index, pattern_info, }) } + #[must_use] pub fn syntax_type_name(&self, id: u32) -> &str { unsafe { - let cstr = - CStr::from_ptr(self.syntax_type_names[id as usize].as_ptr() as *const c_char) - .to_bytes(); + let cstr = CStr::from_ptr( + self.syntax_type_names[id as usize] + .as_ptr() + .cast::(), + ) + .to_bytes(); str::from_utf8(cstr).expect("syntax type name was not valid utf-8") } } } +impl Default for TagsContext { + fn default() -> Self { + Self::new() + } +} + impl TagsContext { + #[must_use] pub fn new() -> Self { - TagsContext { + Self { parser: Parser::new(), cursor: QueryCursor::new(), } @@ -263,14 +280,15 @@ impl TagsContext { cancellation_flag: Option<&'a AtomicUsize>, ) -> Result<(impl Iterator> + 'a, bool), Error> { self.parser - .set_language(config.language) + .set_language(&config.language) .map_err(|_| Error::InvalidLanguage)?; self.parser.reset(); unsafe { self.parser.set_cancellation_flag(cancellation_flag) }; let tree = self.parser.parse(source, None).ok_or(Error::Cancelled)?; - // The `matches` iterator borrows the `Tree`, which prevents it from being moved. - // But the tree is really just a pointer, so it's actually ok to move it. + // The `matches` iterator borrows the `Tree`, which prevents it from being + // moved. But the tree is really just a pointer, so it's actually ok to + // move it. let tree_ref = unsafe { mem::transmute::<_, &'static Tree>(&tree) }; let matches = self .cursor @@ -325,9 +343,8 @@ where let tag = self.tag_queue.remove(0).0; if tag.is_ignored() { continue; - } else { - return Some(Ok(tag)); } + return Some(Ok(tag)); } } @@ -445,16 +462,16 @@ where } } - // Generate a doc string from all of the doc nodes, applying any strip regexes. + // Generate a doc string from all of the doc nodes, applying any strip + // regexes. let mut docs = None; for doc_node in &doc_nodes[docs_start_index..] { if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()]) { - let content = if let Some(regex) = &pattern_info.doc_strip_regex { - regex.replace_all(content, "").to_string() - } else { - content.to_string() - }; + let content = pattern_info.doc_strip_regex.as_ref().map_or_else( + || content.to_string(), + |regex| regex.replace_all(content, "").to_string(), + ); match &mut docs { None => docs = Some(content), Some(d) => { @@ -469,9 +486,9 @@ where let range = rng.start.min(name_range.start)..rng.end.max(name_range.end); let span = name_node.start_position()..name_node.end_position(); - // Compute tag properties that depend on the text of the containing line. If the - // previous tag occurred on the same line, then reuse results from the previous tag. - let line_range; + // Compute tag properties that depend on the text of the containing line. If + // the previous tag occurred on the same line, then + // reuse results from the previous tag. let mut prev_utf16_column = 0; let mut prev_utf8_byte = name_range.start - span.start.column; let line_info = self.prev_line_info.as_ref().and_then(|info| { @@ -481,20 +498,20 @@ where None } }); - if let Some(line_info) = line_info { - line_range = line_info.line_range.clone(); + let line_range = if let Some(line_info) = line_info { if line_info.utf8_position.column <= span.start.column { prev_utf8_byte = line_info.utf8_byte; prev_utf16_column = line_info.utf16_column; } + line_info.line_range.clone() } else { - line_range = self::line_range( + self::line_range( self.source, name_range.start, span.start, MAX_LINE_LEN, - ); - } + ) + }; let utf16_start_column = prev_utf16_column + utf16_len(&self.source[prev_utf8_byte..name_range.start]); @@ -509,11 +526,11 @@ where line_range: line_range.clone(), }); tag = Tag { + range, + name_range, line_range, span, utf16_column_range, - range, - name_range, docs, is_definition, syntax_type_id, @@ -552,8 +569,9 @@ where } impl Tag { - fn ignored(name_range: Range) -> Self { - Tag { + #[must_use] + const fn ignored(name_range: Range) -> Self { + Self { name_range, line_range: 0..0, span: Point::new(0, 0)..Point::new(0, 0), @@ -565,7 +583,8 @@ impl Tag { } } - fn is_ignored(&self) -> bool { + #[must_use] + const fn is_ignored(&self) -> bool { self.range.start == usize::MAX } } diff --git a/test/fixtures/error_corpus/c_errors.txt b/test/fixtures/error_corpus/c_errors.txt index 97c75f0..d44400c 100644 --- a/test/fixtures/error_corpus/c_errors.txt +++ b/test/fixtures/error_corpus/c_errors.txt @@ -14,8 +14,8 @@ int main() { (primitive_type) (function_declarator (identifier) (parameter_list)) (compound_statement - (expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING ";")) - (expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING ";"))))) + (expression_statement (call_expression (identifier) (argument_list (string_literal (string_content)))) (MISSING ";")) + (expression_statement (call_expression (identifier) (argument_list (string_literal (string_content)))) (MISSING ";"))))) ============================================== Top-level declarations with missing semicolons @@ -53,7 +53,7 @@ int c() { (translation_unit (preproc_ifdef (identifier) - (linkage_specification (string_literal) (declaration_list + (linkage_specification (string_literal (string_content)) (declaration_list (preproc_call (preproc_directive)) (comment) (declaration (primitive_type) (identifier)) @@ -108,7 +108,7 @@ int main() { (function_declarator (identifier) (parameter_list)) (compound_statement (declaration (primitive_type) (identifier)) - (ERROR (primitive_type) (UNEXPECTED '$'))))) + (ERROR (primitive_type) (ERROR) (identifier) (UNEXPECTED '@'))))) ========================================= Extra values in parenthesized expressions diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index 4359ae6..9b92e10 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -74,16 +74,16 @@ if ({a: 'b'} {c: 'd'}) { (program (if_statement (parenthesized_expression - (ERROR (object (pair (property_identifier) (string)))) - (object (pair (property_identifier) (string)))) + (ERROR (object (pair (property_identifier) (string (string_fragment))))) + (object (pair (property_identifier) (string (string_fragment))))) (statement_block (expression_statement (assignment_expression (identifier) - (function (formal_parameters (identifier)) (statement_block (expression_statement (identifier))))) + (function_expression (formal_parameters (identifier)) (statement_block (expression_statement (identifier))))) (MISSING ";")) (expression_statement - (function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))))))) + (function_expression (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))))))) =================================================== Extra tokens at the end of the file @@ -148,14 +148,14 @@ const h = `i ${j(k} l` (lexical_declaration (variable_declarator (identifier) - (template_string (template_substitution - (augmented_assignment_expression (identifier) (MISSING identifier)))))) + (template_string (string_fragment) (template_substitution + (augmented_assignment_expression (identifier) (MISSING identifier))) (string_fragment)))) (lexical_declaration (variable_declarator (identifier) - (template_string (template_substitution (call_expression + (template_string (string_fragment) (template_substitution (call_expression (identifier) - (arguments (identifier) (MISSING ")")))))))) + (arguments (identifier) (MISSING ")")))) (string_fragment))))) ========================================================= Long sequences of invalid tokens @@ -178,12 +178,12 @@ function main(x) { (expression_statement (call_expression (member_expression (identifier) (property_identifier)) - (arguments (string)))) + (arguments (string (string_fragment))))) (expression_statement (binary_expression (identifier) (ERROR) (call_expression (member_expression (identifier) (property_identifier)) - (arguments (string))))) + (arguments (string (string_fragment)))))) (return_statement (object))))) diff --git a/test/fixtures/error_corpus/json_errors.txt b/test/fixtures/error_corpus/json_errors.txt index 53ce94e..065fa88 100644 --- a/test/fixtures/error_corpus/json_errors.txt +++ b/test/fixtures/error_corpus/json_errors.txt @@ -6,7 +6,8 @@ top-level errors --- -(ERROR) +(document + (ERROR)) ========================================== unexpected tokens @@ -16,7 +17,9 @@ barf --- -(ERROR (UNEXPECTED 'b')) +(document + (ERROR + (UNEXPECTED 'b'))) ========================================== errors inside arrays @@ -26,10 +29,11 @@ errors inside arrays --- -(document (array - (number) - (ERROR) - (number))) +(document + (array + (number) + (ERROR) + (number))) ========================================== errors inside objects @@ -39,9 +43,14 @@ errors inside objects --- -(document (object - (pair (string (string_content)) (number)) - (ERROR (UNEXPECTED 'o')))) +(document + (object + (pair + (string + (string_content)) + (number)) + (ERROR + (UNEXPECTED 'o')))) ========================================== errors inside nested objects @@ -51,12 +60,23 @@ errors inside nested objects --- -(document (object - (pair (string (string_content)) (object - (pair (string (string_content)) (number)) - (ERROR (number)))) - (pair (string (string_content)) (number)) - (ERROR))) +(document + (object + (pair + (string + (string_content)) + (object + (pair + (string + (string_content)) + (number)) + (ERROR + (number)))) + (pair + (string + (string_content)) + (number)) + (ERROR))) =============================== incomplete tokens at EOF @@ -65,4 +85,6 @@ incomplete tokens at EOF nul --- -(ERROR (UNEXPECTED '\0')) +(document + (ERROR + (UNEXPECTED '\0'))) diff --git a/test/fixtures/error_corpus/python_errors.txt b/test/fixtures/error_corpus/python_errors.txt index bd3101a..d5b4a5d 100644 --- a/test/fixtures/error_corpus/python_errors.txt +++ b/test/fixtures/error_corpus/python_errors.txt @@ -90,7 +90,9 @@ def a(): (ERROR (identifier)) body: (block (expression_statement (string - string_content: (string_content)))))) + (string_start) + (string_content) + (string_end)))))) =========================================== incomplete definition in class definition diff --git a/test/fixtures/error_corpus/ruby_errors.txt b/test/fixtures/error_corpus/ruby_errors.txt index df16081..88e7ee3 100644 --- a/test/fixtures/error_corpus/ruby_errors.txt +++ b/test/fixtures/error_corpus/ruby_errors.txt @@ -1,10 +1,11 @@ ========================== -Heredocs with errors +Heredocs with errors 2 ========================== -joins(<<~SQL( +joins <<~SQL b SQL +) c --- @@ -12,8 +13,10 @@ c (program (call method: (identifier) - (ERROR (heredoc_beginning)) arguments: (argument_list - (heredoc_body (heredoc_content) (heredoc_end)) - (identifier) - (MISSING ")")))) + (heredoc_beginning))) + (heredoc_body + (heredoc_content) + (heredoc_end)) + (ERROR) + (identifier)) diff --git a/test/fixtures/test_grammars/epsilon_external_tokens/scanner.c b/test/fixtures/test_grammars/epsilon_external_tokens/scanner.c index 85bc7c6..d25c1ff 100644 --- a/test/fixtures/test_grammars/epsilon_external_tokens/scanner.c +++ b/test/fixtures/test_grammars/epsilon_external_tokens/scanner.c @@ -1,4 +1,4 @@ -#include +#include "tree_sitter/parser.h" enum TokenType { ZERO_WIDTH_TOKEN diff --git a/test/fixtures/test_grammars/external_and_internal_anonymous_tokens/scanner.c b/test/fixtures/test_grammars/external_and_internal_anonymous_tokens/scanner.c index eb999d1..187c675 100644 --- a/test/fixtures/test_grammars/external_and_internal_anonymous_tokens/scanner.c +++ b/test/fixtures/test_grammars/external_and_internal_anonymous_tokens/scanner.c @@ -1,4 +1,9 @@ -#include "../external_and_internal_tokens/scanner.c" +#include "tree_sitter/parser.h" + +enum { + STRING, + LINE_BREAK +}; void *tree_sitter_external_and_internal_anonymous_tokens_external_scanner_create() { return NULL; @@ -28,9 +33,38 @@ bool tree_sitter_external_and_internal_anonymous_tokens_external_scanner_scan( TSLexer *lexer, const bool *whitelist ) { - return tree_sitter_external_and_internal_tokens_external_scanner_scan( - payload, - lexer, - whitelist - ); + // If a line-break is a valid lookahead token, only skip spaces. + if (whitelist[LINE_BREAK]) { + while (lexer->lookahead == ' ' || lexer->lookahead == '\r') { + lexer->advance(lexer, true); + } + + if (lexer->lookahead == '\n') { + lexer->advance(lexer, false); + lexer->result_symbol = LINE_BREAK; + return true; + } + } + + // If a line-break is not a valid lookahead token, skip line breaks as well + // as spaces. + if (whitelist[STRING]) { + while (lexer->lookahead == ' ' || lexer->lookahead == '\r' || lexer->lookahead == '\n') { + lexer->advance(lexer, true); + } + + if (lexer->lookahead == '\'') { + lexer->advance(lexer, false); + + while (lexer->lookahead != '\'') { + lexer->advance(lexer, false); + } + + lexer->advance(lexer, false); + lexer->result_symbol = STRING; + return true; + } + } + + return false; } diff --git a/test/fixtures/test_grammars/external_and_internal_tokens/scanner.c b/test/fixtures/test_grammars/external_and_internal_tokens/scanner.c index 43a4bc9..55454f2 100644 --- a/test/fixtures/test_grammars/external_and_internal_tokens/scanner.c +++ b/test/fixtures/test_grammars/external_and_internal_tokens/scanner.c @@ -1,4 +1,4 @@ -#include +#include "tree_sitter/parser.h" enum { STRING, diff --git a/test/fixtures/test_grammars/external_extra_tokens/scanner.c b/test/fixtures/test_grammars/external_extra_tokens/scanner.c index ac6d840..95d5842 100644 --- a/test/fixtures/test_grammars/external_extra_tokens/scanner.c +++ b/test/fixtures/test_grammars/external_extra_tokens/scanner.c @@ -1,4 +1,4 @@ -#include +#include "tree_sitter/parser.h" enum { COMMENT, diff --git a/test/fixtures/test_grammars/external_tokens/scanner.c b/test/fixtures/test_grammars/external_tokens/scanner.c index 490100d..163ae51 100644 --- a/test/fixtures/test_grammars/external_tokens/scanner.c +++ b/test/fixtures/test_grammars/external_tokens/scanner.c @@ -1,4 +1,4 @@ -#include +#include "tree_sitter/parser.h" enum { percent_string, diff --git a/test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c b/test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c index a9e9873..3380bba 100644 --- a/test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c +++ b/test/fixtures/test_grammars/external_unicode_column_alignment/scanner.c @@ -1,4 +1,5 @@ -#include +#include "tree_sitter/parser.h" + #include #include diff --git a/test/fixtures/test_grammars/get_col_should_hang_not_crash/corpus.txt b/test/fixtures/test_grammars/get_col_should_hang_not_crash/corpus.txt new file mode 100644 index 0000000..e69de29 diff --git a/test/fixtures/test_grammars/get_col_should_hang_not_crash/grammar.js b/test/fixtures/test_grammars/get_col_should_hang_not_crash/grammar.js new file mode 100644 index 0000000..83d57d2 --- /dev/null +++ b/test/fixtures/test_grammars/get_col_should_hang_not_crash/grammar.js @@ -0,0 +1,13 @@ +module.exports = grammar({ + name: 'get_col_should_hang_not_crash', + + externals: $ => [ + $.test, + ], + + rules: { + source_file: $ => seq( + $.test + ), + }, +}); diff --git a/test/fixtures/test_grammars/get_col_should_hang_not_crash/scanner.c b/test/fixtures/test_grammars/get_col_should_hang_not_crash/scanner.c new file mode 100644 index 0000000..000647a --- /dev/null +++ b/test/fixtures/test_grammars/get_col_should_hang_not_crash/scanner.c @@ -0,0 +1,17 @@ +#include "tree_sitter/parser.h" + +unsigned tree_sitter_get_col_should_hang_not_crash_external_scanner_serialize() { return 0; } + +void tree_sitter_get_col_should_hang_not_crash_external_scanner_deserialize() {} + +void *tree_sitter_get_col_should_hang_not_crash_external_scanner_create() { return NULL; } + +void tree_sitter_get_col_should_hang_not_crash_external_scanner_destroy() {} + +bool tree_sitter_get_col_should_hang_not_crash_external_scanner_scan(void *payload, TSLexer *lexer, + const bool *valid_symbols) { + while (true) { + lexer->advance(lexer, false); + lexer->get_column(lexer); + } +} diff --git a/test/fixtures/test_grammars/inverted_external_token/scanner.c b/test/fixtures/test_grammars/inverted_external_token/scanner.c index 260994c..33fde6d 100644 --- a/test/fixtures/test_grammars/inverted_external_token/scanner.c +++ b/test/fixtures/test_grammars/inverted_external_token/scanner.c @@ -1,4 +1,4 @@ -#include +#include "tree_sitter/parser.h" enum { LINE_BREAK diff --git a/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.js b/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.js new file mode 100644 index 0000000..4868dc8 --- /dev/null +++ b/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.js @@ -0,0 +1,31 @@ +module.exports = grammar({ + name: 'lexical_conflicts_due_to_state_merging', + + rules: { + expression: $ => choice( + $.conditional, + $.quotient, + $.regex, + $.number, + $.parenthesized, + ), + + conditional: $ => prec.left(1, seq( + 'if', + $.parenthesized, + $.expression + )), + + quotient: $ => prec.left(seq( + $.expression, + '/', + $.expression + )), + + regex: $ => /\/[^/\n]+\//, + + number: $ => /\d+/, + + parenthesized: $ => seq('(', $.expression, ')'), + }, +}); diff --git a/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.json b/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.json deleted file mode 100644 index 143d6f2..0000000 --- a/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "name": "lexical_conflicts_due_to_state_merging", - - "extras": [ - {"type": "PATTERN", "value": "\\s"} - ], - - "rules": { - "expression": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "conditional"}, - {"type": "SYMBOL", "name": "regex"}, - {"type": "SYMBOL", "name": "quotient"}, - {"type": "SYMBOL", "name": "number"}, - {"type": "SYMBOL", "name": "parenthesized"} - ] - }, - - "conditional": { - "type": "PREC_LEFT", - "value": 1, - "content": { - "type": "SEQ", - "members": [ - {"type": "STRING", "value": "if"}, - {"type": "SYMBOL", "name": "parenthesized"}, - {"type": "SYMBOL", "name": "expression"} - ] - } - }, - - "quotient": { - "type": "PREC_LEFT", - "value": 0, - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": "/"}, - {"type": "SYMBOL", "name": "expression"} - ] - } - }, - - "regex": { - "type": "PATTERN", - "value": "/[^/\n]+/" - }, - - "number": { - "type": "PATTERN", - "value": "\\d+" - }, - - "parenthesized": { - "type": "SEQ", - "members": [ - {"type": "STRING", "value": "("}, - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": ")"} - ] - } - } -} \ No newline at end of file diff --git a/test/fixtures/test_grammars/named_precedences/grammar.js b/test/fixtures/test_grammars/named_precedences/grammar.js new file mode 100644 index 0000000..2132385 --- /dev/null +++ b/test/fixtures/test_grammars/named_precedences/grammar.js @@ -0,0 +1,48 @@ +module.exports = grammar({ + name: 'named_precedences', + + conflicts: $ => [ + [$.expression, $.type], + [$.expression, $.nested_type], + ], + + precedences: $ => [ + [$.member_expression, "and", "or"], + [$.nested_type, "type_intersection", "type_union"], + ], + + rules: { + program: $ => repeat(choice( + $.expression_statement, + $.declaration_statement, + )), + + expression_statement: $ => seq($.expression, ';'), + + declaration_statement: $ => seq($.type, $.expression, ';'), + + expression: $ => choice( + $.member_expression, + $.binary_expression, + $.identifier, + ), + + member_expression: $ => seq($.expression, '.', $.identifier), + + binary_expression: $ => choice( + prec.left('or', seq($.expression, '||', $.expression)), + prec.left('and', seq($.expression, '&&', $.expression)), + ), + + type: $ => choice($.nested_type, $.binary_type, $.identifier), + + nested_type: $ => seq($.identifier, '.', $.identifier), + + binary_type: $ => choice( + prec.left('type_union', seq($.type, '||', $.type)), + prec.left('type_intersection', seq($.type, '&&', $.type)), + ), + + identifier: $ => /[a-z]\w+/, + }, +}); diff --git a/test/fixtures/test_grammars/named_precedences/grammar.json b/test/fixtures/test_grammars/named_precedences/grammar.json deleted file mode 100644 index ec679ca..0000000 --- a/test/fixtures/test_grammars/named_precedences/grammar.json +++ /dev/null @@ -1,159 +0,0 @@ -{ - "name": "named_precedences", - - "extras": [ - { - "type": "PATTERN", - "value": "\\s+" - } - ], - - "precedences": [ - [ - {"type": "SYMBOL", "name": "member_expression"}, - {"type": "STRING", "value": "and"}, - {"type": "STRING", "value": "or"} - ], - [ - {"type": "SYMBOL", "name": "nested_type"}, - {"type": "STRING", "value": "type_intersection"}, - {"type": "STRING", "value": "type_union"} - ] - ], - - "conflicts": [ - ["expression", "type"], - ["expression", "nested_type"] - ], - - "rules": { - "program": { - "type": "REPEAT", - "content": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "expression_statement"}, - {"type": "SYMBOL", "name": "declaration_statement"} - ] - } - }, - - "expression_statement": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": ";"} - ] - }, - - "declaration_statement": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "type"}, - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": ";"} - ] - }, - - "expression": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "member_expression"}, - {"type": "SYMBOL", "name": "binary_expression"}, - {"type": "SYMBOL", "name": "identifier"} - ] - }, - - "member_expression": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": "."}, - {"type": "SYMBOL", "name": "identifier"} - ] - }, - - "binary_expression": { - "type": "CHOICE", - "members": [ - { - "type": "PREC_LEFT", - "value": "or", - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": "||"}, - {"type": "SYMBOL", "name": "expression"} - ] - } - }, - { - "type": "PREC_LEFT", - "value": "and", - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": "&&"}, - {"type": "SYMBOL", "name": "expression"} - ] - } - } - ] - }, - - "type": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "nested_type"}, - {"type": "SYMBOL", "name": "binary_type"}, - {"type": "SYMBOL", "name": "identifier"} - ] - }, - - "nested_type": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "STRING", "value": "."}, - {"type": "SYMBOL", "name": "identifier"} - ] - }, - - "binary_type": { - "type": "CHOICE", - "members": [ - { - "type": "PREC_LEFT", - "value": "type_union", - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "type"}, - {"type": "STRING", "value": "||"}, - {"type": "SYMBOL", "name": "type"} - ] - } - }, - { - "type": "PREC_LEFT", - "value": "type_intersection", - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "type"}, - {"type": "STRING", "value": "&&"}, - {"type": "SYMBOL", "name": "type"} - ] - } - } - ] - }, - - "identifier": { - "type": "PATTERN", - "value": "[a-z]\\w+" - } - } -} diff --git a/test/fixtures/test_grammars/named_rule_aliased_as_anonymous/grammar.js b/test/fixtures/test_grammars/named_rule_aliased_as_anonymous/grammar.js new file mode 100644 index 0000000..3f30de5 --- /dev/null +++ b/test/fixtures/test_grammars/named_rule_aliased_as_anonymous/grammar.js @@ -0,0 +1,15 @@ +module.exports = grammar({ + name: 'named_rule_aliased_as_anonymous', + + rules: { + a: $ => seq( + alias($.b, 'the-alias'), + $.c, + $.b, + ), + + b: _ => 'B', + + c: _ => 'C', + }, +}); diff --git a/test/fixtures/test_grammars/named_rule_aliased_as_anonymous/grammar.json b/test/fixtures/test_grammars/named_rule_aliased_as_anonymous/grammar.json deleted file mode 100644 index 2ff80dd..0000000 --- a/test/fixtures/test_grammars/named_rule_aliased_as_anonymous/grammar.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "name": "named_rule_aliased_as_anonymous", - - "extras": [ - {"type": "PATTERN", "value": "\\s"} - ], - - "rules": { - "a": { - "type": "SEQ", - "members": [ - { - "type": "ALIAS", - "value": "the-alias", - "named": false, - "content": {"type": "SYMBOL", "name": "b"} - }, - {"type": "SYMBOL", "name": "c"}, - {"type": "SYMBOL", "name": "b"} - ] - }, - - "b": { - "type": "STRING", - "value": "B" - }, - - "c": { - "type": "STRING", - "value": "C" - } - } -} diff --git a/test/fixtures/test_grammars/nested_inlined_rules/grammar.js b/test/fixtures/test_grammars/nested_inlined_rules/grammar.js new file mode 100644 index 0000000..7aaf601 --- /dev/null +++ b/test/fixtures/test_grammars/nested_inlined_rules/grammar.js @@ -0,0 +1,22 @@ +module.exports = grammar({ + name: 'nested_inlined_rules', + + inline: $ => [ + $.top_level_item, + $.statement, + ], + + rules: { + program: $ => repeat1($.top_level_item), + + top_level_item: $ => choice($.statement, '!'), + + statement: $ => choice($.expression_statement, $.return_statement), + + return_statement: $ => seq('return', $.number, ';'), + + expression_statement: $ => seq($.number, ';'), + + number: _ => /\d+/, + }, +}); diff --git a/test/fixtures/test_grammars/nested_inlined_rules/grammar.json b/test/fixtures/test_grammars/nested_inlined_rules/grammar.json deleted file mode 100644 index f240de1..0000000 --- a/test/fixtures/test_grammars/nested_inlined_rules/grammar.json +++ /dev/null @@ -1,54 +0,0 @@ -{ - "name": "nested_inlined_rules", - - "extras": [ - {"type": "PATTERN", "value": "\\s"} - ], - - "inline": [ - "top_level_item", - "statement" - ], - - "rules": { - "program": { - "type": "REPEAT1", - "content": {"type": "SYMBOL", "name": "top_level_item"} - }, - - "top_level_item": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "statement"}, - {"type": "STRING", "value": "!"} - ] - }, - - "statement": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "expression_statement"}, - {"type": "SYMBOL", "name": "return_statement"} - ] - }, - - "return_statement": { - "type": "SEQ", - "members": [ - {"type": "STRING", "value": "return"}, - {"type": "SYMBOL", "name": "number"}, - {"type": "STRING", "value": ";"} - ] - }, - - "expression_statement": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "number"}, - {"type": "STRING", "value": ";"} - ] - }, - - "number": {"type": "PATTERN", "value": "\\d+"} - } -} diff --git a/test/fixtures/test_grammars/partially_resolved_conflict/grammar.js b/test/fixtures/test_grammars/partially_resolved_conflict/grammar.js new file mode 100644 index 0000000..cd0d1d6 --- /dev/null +++ b/test/fixtures/test_grammars/partially_resolved_conflict/grammar.js @@ -0,0 +1,19 @@ +module.exports = grammar({ + name: 'partially_resolved_conflict', + + rules: { + expression: $ => choice($.binary, $.identifier), + + unary_a: $ => prec(2, seq('!', $.expression)), + + unary_b: $ => prec(2, seq('!', $.expression)), + + binary: $ => seq( + choice($.unary_a, $.unary_b, $.expression), + '<', + $.expression, + ), + + identifier: _ => /[a-z]+/, + }, +}); diff --git a/test/fixtures/test_grammars/partially_resolved_conflict/grammar.json b/test/fixtures/test_grammars/partially_resolved_conflict/grammar.json deleted file mode 100644 index adca84d..0000000 --- a/test/fixtures/test_grammars/partially_resolved_conflict/grammar.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "name": "partially_resolved_conflict", - - "rules": { - "expression": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "binary"}, - {"type": "SYMBOL", "name": "identifier"} - ] - }, - - "unary_a": { - "type": "PREC", - "value": 2, - "content": { - "type": "SEQ", - "members": [ - {"type": "STRING", "value": "!"}, - {"type": "SYMBOL", "name": "expression"} - ] - } - }, - - "unary_b": { - "type": "PREC", - "value": 2, - "content": { - "type": "SEQ", - "members": [ - {"type": "STRING", "value": "!"}, - {"type": "SYMBOL", "name": "expression"} - ] - } - }, - - "binary": { - "type": "SEQ", - "members": [ - { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "unary_a"}, - {"type": "SYMBOL", "name": "unary_b"}, - {"type": "SYMBOL", "name": "expression"} - ] - }, - {"type": "STRING", "value": "<"}, - {"type": "SYMBOL", "name": "expression"} - ] - }, - - "identifier": { - "type": "PATTERN", - "value": "[a-z]+" - } - } -} diff --git a/test/fixtures/test_grammars/precedence_on_single_child_missing/grammar.js b/test/fixtures/test_grammars/precedence_on_single_child_missing/grammar.js new file mode 100644 index 0000000..fbdb450 --- /dev/null +++ b/test/fixtures/test_grammars/precedence_on_single_child_missing/grammar.js @@ -0,0 +1,17 @@ +module.exports = grammar({ + name: 'precedence_on_single_child_missing', + + rules: { + expression: $ => choice($.function_call, $.identifier), + + function_call: $ => prec.right(choice( + seq($.identifier, $.expression), + seq($.identifier, $.block), + seq($.identifier, $.expression, $.block), + )), + + block: $ => seq('{', $.expression, '}'), + + identifier: _ => /[a-zA-Z]+/, + }, +}); diff --git a/test/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json b/test/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json deleted file mode 100644 index 1985270..0000000 --- a/test/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json +++ /dev/null @@ -1,63 +0,0 @@ -{ - "name": "precedence_on_single_child_missing", - - "extras": [ - {"type": "PATTERN", "value": "\\s"} - ], - - "rules": { - "expression": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "function_call"}, - {"type": "SYMBOL", "name": "identifier"} - ] - }, - - "function_call": { - "type": "PREC_RIGHT", - "value": 0, - "content": { - "type": "CHOICE", - "members": [ - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "expression"} - ] - }, - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "block"} - ] - }, - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "expression"}, - {"type": "SYMBOL", "name": "block"} - ] - } - ] - } - }, - - "block": { - "type": "SEQ", - "members": [ - {"type": "STRING", "value": "{"}, - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": "}"} - ] - }, - - "identifier": { - "type": "PATTERN", - "value": "[a-zA-Z]+" - } - } -} \ No newline at end of file diff --git a/test/fixtures/test_grammars/precedence_on_single_child_negative/grammar.js b/test/fixtures/test_grammars/precedence_on_single_child_negative/grammar.js new file mode 100644 index 0000000..798075d --- /dev/null +++ b/test/fixtures/test_grammars/precedence_on_single_child_negative/grammar.js @@ -0,0 +1,17 @@ +module.exports = grammar({ + name: 'precedence_on_single_child_negative', + + rules: { + expression: $ => choice($.function_call, $.identifier), + + function_call: $ => prec.right(-1, choice( + seq($.identifier, $.expression), + seq($.identifier, $.block), + seq($.identifier, $.expression, $.block), + )), + + block: $ => seq('{', $.expression, '}'), + + identifier: _ => /[a-zA-Z]+/, + }, +}); diff --git a/test/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json b/test/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json deleted file mode 100644 index fc237f5..0000000 --- a/test/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json +++ /dev/null @@ -1,63 +0,0 @@ -{ - "name": "precedence_on_single_child_negative", - - "extras": [ - {"type": "PATTERN", "value": "\\s"} - ], - - "rules": { - "expression": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "function_call"}, - {"type": "SYMBOL", "name": "identifier"} - ] - }, - - "function_call": { - "type": "PREC_RIGHT", - "value": -1, - "content": { - "type": "CHOICE", - "members": [ - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "expression"} - ] - }, - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "block"} - ] - }, - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "expression"}, - {"type": "SYMBOL", "name": "block"} - ] - } - ] - } - }, - - "block": { - "type": "SEQ", - "members": [ - {"type": "STRING", "value": "{"}, - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": "}"} - ] - }, - - "identifier": { - "type": "PATTERN", - "value": "[a-zA-Z]+" - } - } -} \ No newline at end of file diff --git a/test/fixtures/test_grammars/precedence_on_single_child_positive/grammar.js b/test/fixtures/test_grammars/precedence_on_single_child_positive/grammar.js new file mode 100644 index 0000000..d2e57c3 --- /dev/null +++ b/test/fixtures/test_grammars/precedence_on_single_child_positive/grammar.js @@ -0,0 +1,17 @@ +module.exports = grammar({ + name: 'precedence_on_single_child_positive', + + rules: { + expression: $ => choice($.function_call, $.identifier), + + function_call: $ => prec.right(1, choice( + seq($.identifier, $.expression), + seq($.identifier, $.block), + seq($.identifier, $.expression, $.block), + )), + + block: $ => seq('{', $.expression, '}'), + + identifier: _ => /[a-zA-X]+/, + }, +}); diff --git a/test/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json b/test/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json deleted file mode 100644 index 7ffa73e..0000000 --- a/test/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json +++ /dev/null @@ -1,63 +0,0 @@ -{ - "name": "precedence_on_single_child_positive", - - "extras": [ - {"type": "PATTERN", "value": "\\s"} - ], - - "rules": { - "expression": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "function_call"}, - {"type": "SYMBOL", "name": "identifier"} - ] - }, - - "function_call": { - "type": "PREC_RIGHT", - "value": 1, - "content": { - "type": "CHOICE", - "members": [ - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "expression"} - ] - }, - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "block"} - ] - }, - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "expression"}, - {"type": "SYMBOL", "name": "block"} - ] - } - ] - } - }, - - "block": { - "type": "SEQ", - "members": [ - {"type": "STRING", "value": "{"}, - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": "}"} - ] - }, - - "identifier": { - "type": "PATTERN", - "value": "[a-zA-Z]+" - } - } -} \ No newline at end of file diff --git a/test/fixtures/test_grammars/precedence_on_subsequence/grammar.js b/test/fixtures/test_grammars/precedence_on_subsequence/grammar.js new file mode 100644 index 0000000..3a5bdef --- /dev/null +++ b/test/fixtures/test_grammars/precedence_on_subsequence/grammar.js @@ -0,0 +1,30 @@ +module.exports = grammar({ + name: 'precedence_on_subsequence', + + rules: { + expression: $ => prec.left(choice( + $.function_call, + $.identifier, + $.scope_resolution, + )), + + function_call: $ => choice( + seq($.identifier, $.expression), + prec(1, seq($.identifier, $.block)), + prec(-1, seq($.identifier, $.do_block)), + seq($.identifier, prec(1, seq($.expression, $.block))), + seq($.identifier, prec(-1, seq($.expression, $.do_block))), + ), + + scope_resolution: $ => prec.left(1, choice( + seq($.expression, '::', $.expression), + seq('::', $.expression), + )), + + block: _ => '{}', + + do_block: _ => 'do end', + + identifier: _ => /[a-zA-Z]+/, + }, +}); diff --git a/test/fixtures/test_grammars/precedence_on_subsequence/grammar.json b/test/fixtures/test_grammars/precedence_on_subsequence/grammar.json deleted file mode 100644 index d992793..0000000 --- a/test/fixtures/test_grammars/precedence_on_subsequence/grammar.json +++ /dev/null @@ -1,135 +0,0 @@ -{ - "name": "precedence_on_subsequence", - - "extras": [ - {"type": "PATTERN", "value": "\\s"} - ], - - "rules": { - "expression": { - "type": "PREC_LEFT", - "value": 0, - "content": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "function_call"}, - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "scope_resolution"} - ] - } - }, - - "function_call": { - "type": "CHOICE", - "members": [ - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "expression"} - ] - }, - - { - "type": "PREC", - "value": 1, - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "block"} - ] - } - }, - - { - "type": "PREC", - "value": -1, - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - {"type": "SYMBOL", "name": "do_block"} - ] - } - }, - - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - { - "type": "PREC", - "value": 1, - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "SYMBOL", "name": "block"} - ] - } - } - ] - }, - - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "identifier"}, - { - "type": "PREC", - "value": -1, - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "SYMBOL", "name": "do_block"} - ] - } - } - ] - } - ] - }, - - "scope_resolution": { - "type": "PREC_LEFT", - "value": 1, - "content": { - "type": "CHOICE", - "members": [ - { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": "::"}, - {"type": "SYMBOL", "name": "expression"} - ] - }, - { - "type": "SEQ", - "members": [ - {"type": "STRING", "value": "::"}, - {"type": "SYMBOL", "name": "expression"} - ] - } - ] - } - }, - - "block": { - "type": "STRING", - "value": "{}" - }, - - "do_block": { - "type": "STRING", - "value": "do end" - }, - - "identifier": { - "type": "PATTERN", - "value": "[a-zA-Z]+" - } - } -} diff --git a/test/fixtures/test_grammars/precedence_on_token/grammar.js b/test/fixtures/test_grammars/precedence_on_token/grammar.js new file mode 100644 index 0000000..e56f2d8 --- /dev/null +++ b/test/fixtures/test_grammars/precedence_on_token/grammar.js @@ -0,0 +1,36 @@ +module.exports = grammar({ + name: 'precedence_on_token', + + extras: $ => [ + /\s/, + $.comment, + ], + + rules: { + program: $ => repeat(choice( + $.string, + $.regex, + $.identifier, + $.slash, + )), + + comment: _ => token(prec(1, /\/\/.*|\/\*[^*]*\*\//)), + + string: $ => seq( + '"', + repeat(choice( + token(prec(2, /[^\"\n\\]+/)), + $.escape_sequence, + )), + '"', + ), + + escape_sequence: _ => /\\./, + + regex: _ => /\/[^\/\n]+\/[a-z]*/, + + identifier: _ => /[a-z]\w*/, + + slash: _ => '/', + }, +}); diff --git a/test/fixtures/test_grammars/precedence_on_token/grammar.json b/test/fixtures/test_grammars/precedence_on_token/grammar.json deleted file mode 100644 index 8ba7e69..0000000 --- a/test/fixtures/test_grammars/precedence_on_token/grammar.json +++ /dev/null @@ -1,100 +0,0 @@ -{ - "name": "precedence_on_token", - - "extras": [ - {"type": "SYMBOL", "name": "comment"}, - {"type": "PATTERN", "value": "\\s"} - ], - - "rules": { - "program": { - "type": "REPEAT", - "content": { - "type": "CHOICE", - "members": [ - { - "type": "SYMBOL", - "name": "string" - }, - { - "type": "SYMBOL", - "name": "regex" - }, - { - "type": "SYMBOL", - "name": "identifier" - }, - { - "type": "SYMBOL", - "name": "slash" - } - ] - } - }, - - "comment": { - "type": "TOKEN", - "content": { - "type": "PREC", - "value": 1, - "content": { - "type": "PATTERN", - "value": "//.*|/\\*[^*]*\\*/" - } - } - }, - - "string": { - "type": "SEQ", - "members": [ - {"type": "STRING", "value": "\""}, - - { - "type": "REPEAT", - "content": { - "type": "CHOICE", - "members": [ - { - "type": "TOKEN", - "content": { - "type": "PREC", - "value": 2, - "content": { - "type": "PATTERN", - "value": "[^\"\n\\\\]+" - } - } - }, - { - "type": "SYMBOL", - "name": "escape_sequence" - } - ] - } - }, - - {"type": "STRING", "value": "\""} - ] - }, - - "escape_sequence": { - "type": "PATTERN", - "value": "\\\\." - }, - - "regex": { - "type": "PATTERN", - "value": "/[^/\n]+/[a-z]*" - }, - - "identifier": { - "type": "PATTERN", - "value": "[a-z]\\w*" - }, - - "slash": { - "type": "STRING", - "value": "/" - } - } -} diff --git a/test/fixtures/test_grammars/readme_grammar/grammar.js b/test/fixtures/test_grammars/readme_grammar/grammar.js new file mode 100644 index 0000000..9f3ce6d --- /dev/null +++ b/test/fixtures/test_grammars/readme_grammar/grammar.js @@ -0,0 +1,36 @@ +module.exports = grammar({ + name: 'readme_grammar', + + // Things that can appear anywhere in the language, like comments + // and whitespace, are expressed as 'extras'. + extras: $ => [ + /\s/, + $.comment, + ], + + rules: { + // The first rule listed in the grammar becomes the 'start rule'. + expression: $ => choice( + $.sum, + $.product, + $.number, + $.variable, + seq('(', $.expression, ')'), + ), + + // Tokens like '+' and '*' are described directly within the + // grammar's rules, as opposed to in a separate lexer description. + sum: $ => prec.left(1, seq($.expression, '+', $.expression)), + + // Ambiguities can be resolved at compile time by assigning precedence + // values to rule subtrees. + product: $ => prec.left(2, seq($.expression, '*', $.expression)), + + // Tokens can be specified using ECMAScript regexps. + number: _ => /\d+/, + + comment: _ => /#.*/, + + variable: _ => /[a-zA-Z]\w*/, + }, +}); diff --git a/test/fixtures/test_grammars/readme_grammar/grammar.json b/test/fixtures/test_grammars/readme_grammar/grammar.json deleted file mode 100644 index 91958fd..0000000 --- a/test/fixtures/test_grammars/readme_grammar/grammar.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "name": "readme_grammar", - - // Things that can appear anywhere in the language, like comments - // and whitespace, are expressed as 'extras'. - "extras": [ - {"type": "PATTERN", "value": "\\s"}, - {"type": "SYMBOL", "name": "comment"} - ], - - "rules": { - - // The first rule listed in the grammar becomes the 'start rule'. - "expression": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "sum"}, - {"type": "SYMBOL", "name": "product"}, - {"type": "SYMBOL", "name": "number"}, - {"type": "SYMBOL", "name": "variable"}, - { - "type": "SEQ", - "members": [ - {"type": "STRING", "value": "("}, - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": ")"} - ] - } - ] - }, - - // Tokens like '+' and '*' are described directly within the - // grammar's rules, as opposed to in a separate lexer description. - "sum": { - "type": "PREC_LEFT", - "value": 1, - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": "+"}, - {"type": "SYMBOL", "name": "expression"} - ] - } - }, - - // Ambiguities can be resolved at compile time by assigning precedence - // values to rule subtrees. - "product": { - "type": "PREC_LEFT", - "value": 2, - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "expression"}, - {"type": "STRING", "value": "*"}, - {"type": "SYMBOL", "name": "expression"} - ] - } - }, - - // Tokens can be specified using ECMAScript regexps. - "number": {"type": "PATTERN", "value": "\\d+"}, - "comment": {"type": "PATTERN", "value": "#.*"}, - "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"} - } -} \ No newline at end of file diff --git a/test/fixtures/test_grammars/start_rule_is_blank/grammar.js b/test/fixtures/test_grammars/start_rule_is_blank/grammar.js new file mode 100644 index 0000000..b38e0de --- /dev/null +++ b/test/fixtures/test_grammars/start_rule_is_blank/grammar.js @@ -0,0 +1,7 @@ +module.exports = grammar({ + name: 'start_rule_is_blank', + + rules: { + first_rule: _ => blank(), + }, +}); diff --git a/test/fixtures/test_grammars/start_rule_is_blank/grammar.json b/test/fixtures/test_grammars/start_rule_is_blank/grammar.json deleted file mode 100644 index 94b6c6c..0000000 --- a/test/fixtures/test_grammars/start_rule_is_blank/grammar.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name": "start_rule_is_blank", - "rules": { - "first_rule": {"type": "BLANK"} - } -} \ No newline at end of file diff --git a/test/fixtures/test_grammars/start_rule_is_token/grammar.js b/test/fixtures/test_grammars/start_rule_is_token/grammar.js new file mode 100644 index 0000000..f00433e --- /dev/null +++ b/test/fixtures/test_grammars/start_rule_is_token/grammar.js @@ -0,0 +1,7 @@ +module.exports = grammar({ + name: 'start_rule_is_token', + + rules: { + first_rule: _ => 'the-value', + }, +}); diff --git a/test/fixtures/test_grammars/start_rule_is_token/grammar.json b/test/fixtures/test_grammars/start_rule_is_token/grammar.json deleted file mode 100644 index 9b60c0d..0000000 --- a/test/fixtures/test_grammars/start_rule_is_token/grammar.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name": "start_rule_is_token", - "rules": { - "first_rule": {"type": "STRING", "value": "the-value"} - } -} \ No newline at end of file diff --git a/test/fixtures/test_grammars/unicode_classes/grammar.js b/test/fixtures/test_grammars/unicode_classes/grammar.js new file mode 100644 index 0000000..25dcf13 --- /dev/null +++ b/test/fixtures/test_grammars/unicode_classes/grammar.js @@ -0,0 +1,20 @@ +module.exports = grammar({ + name: 'unicode_classes', + + rules: { + program: $ => repeat(choice( + $.lower, + $.upper, + $.math_sym, + $.letter_number, + )), + + lower: _ => /\p{Ll}\p{L}*/, + + upper: _ => /\p{Lu}\p{L}*/, + + math_sym: _ => /\p{Sm}+/, + + letter_number: _ => /\p{Letter_Number}/, + }, +}); diff --git a/test/fixtures/test_grammars/unicode_classes/grammar.json b/test/fixtures/test_grammars/unicode_classes/grammar.json deleted file mode 100644 index 7a36d0c..0000000 --- a/test/fixtures/test_grammars/unicode_classes/grammar.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "name": "unicode_classes", - - "extras": [ - {"type": "PATTERN", "value": "\\s"} - ], - - "rules": { - "program": { - "type": "REPEAT", - "content": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "lower"}, - {"type": "SYMBOL", "name": "upper"}, - {"type": "SYMBOL", "name": "math_sym"}, - {"type": "SYMBOL", "name": "letter_number"} - ] - } - }, - - "lower": { - "type": "PATTERN", - "value": "\\p{Ll}\\p{L}*" - }, - - "upper": { - "type": "PATTERN", - "value": "\\p{Lu}\\p{L}*" - }, - - "math_sym": { - "type": "PATTERN", - "value": "\\p{Sm}+" - }, - - "letter_number": { - "type": "PATTERN", - "value": "\\p{Letter_Number}" - } - } -} diff --git a/test/fixtures/test_grammars/unused_rules/grammar.js b/test/fixtures/test_grammars/unused_rules/grammar.js new file mode 100644 index 0000000..462243c --- /dev/null +++ b/test/fixtures/test_grammars/unused_rules/grammar.js @@ -0,0 +1,27 @@ +module.exports = grammar({ + name: 'unused_rules', + + rules: { + a: $ => seq($.d, $.h), + + b: _ => 'B', + + c: _ => 'C', + + d: $ => seq($.e, $.f), + + e: _ => 'E', + + f: _ => 'F', + + g: _ => 'G', + + h: $ => seq($.i, $.j), + + i: _ => 'I', + + j: _ => 'J', + + k: _ => 'K', + }, +}); diff --git a/test/fixtures/test_grammars/unused_rules/grammar.json b/test/fixtures/test_grammars/unused_rules/grammar.json deleted file mode 100644 index 7ed2a0d..0000000 --- a/test/fixtures/test_grammars/unused_rules/grammar.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "name": "unused_rules", - - "extras": [ - {"type": "PATTERN", "value": "\\s"} - ], - - "rules": { - "a": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "d"}, - {"type": "SYMBOL", "name": "h"} - ] - }, - - "b": { - "type": "STRING", - "value": "B" - }, - - "c": { - "type": "STRING", - "value": "C" - }, - - "d": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "e"}, - {"type": "SYMBOL", "name": "f"} - ] - }, - - "e": { - "type": "STRING", - "value": "E" - }, - - "f": { - "type": "STRING", - "value": "F" - }, - - "g": { - "type": "STRING", - "value": "G" - }, - - "h": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "i"}, - {"type": "SYMBOL", "name": "j"} - ] - }, - - "i": { - "type": "STRING", - "value": "I" - }, - - "j": { - "type": "STRING", - "value": "J" - }, - - "k": { - "type": "STRING", - "value": "K" - } - } -} \ No newline at end of file diff --git a/test/fixtures/test_grammars/uses_current_column/grammar.js b/test/fixtures/test_grammars/uses_current_column/grammar.js new file mode 100644 index 0000000..795ad59 --- /dev/null +++ b/test/fixtures/test_grammars/uses_current_column/grammar.js @@ -0,0 +1,36 @@ +module.exports = grammar({ + name: 'uses_current_column', + + externals: $ => [ + $._indent, + $._dedent, + $._newline, + ], + + rules: { + block: $ => repeat1($._statement), + + _statement: $ => seq($._expression, $._newline), + + _expression: $ => choice( + $.do_expression, + $.binary_expression, + $.identifier, + ), + + do_expression: $ => seq( + 'do', + $._indent, + $.block, + $._dedent, + ), + + binary_expression: $ => prec.left(1, seq( + $._expression, + choice('=', '+', '-'), + $._expression, + )), + + identifier: _ => /\w+/, + }, +}); diff --git a/test/fixtures/test_grammars/uses_current_column/grammar.json b/test/fixtures/test_grammars/uses_current_column/grammar.json deleted file mode 100644 index 90c740b..0000000 --- a/test/fixtures/test_grammars/uses_current_column/grammar.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "name": "uses_current_column", - - "externals": [ - {"type": "SYMBOL", "name": "_indent"}, - {"type": "SYMBOL", "name": "_dedent"}, - {"type": "SYMBOL", "name": "_newline"} - ], - - "extras": [ - {"type": "PATTERN", "value": "\\s"} - ], - - "rules": { - "block": { - "type": "REPEAT1", - "content": {"type": "SYMBOL", "name": "_statement"} - }, - - "_statement": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "_expression"}, - {"type": "SYMBOL", "name": "_newline"} - ] - }, - - "_expression": { - "type": "CHOICE", - "members": [ - {"type": "SYMBOL", "name": "do_expression"}, - {"type": "SYMBOL", "name": "binary_expression"}, - {"type": "SYMBOL", "name": "identifier"} - ] - }, - - "do_expression": { - "type": "SEQ", - "members": [ - {"type": "STRING", "value": "do"}, - {"type": "SYMBOL", "name": "_indent"}, - {"type": "SYMBOL", "name": "block"}, - {"type": "SYMBOL", "name": "_dedent"} - ] - }, - - "binary_expression": { - "type": "PREC_LEFT", - "value": 1, - "content": { - "type": "SEQ", - "members": [ - {"type": "SYMBOL", "name": "_expression"}, - { - "type": "CHOICE", - "members": [ - {"type": "STRING", "value": "="}, - {"type": "STRING", "value": "+"}, - {"type": "STRING", "value": "-"} - ] - }, - {"type": "SYMBOL", "name": "_expression"} - ] - } - }, - - "identifier": {"type": "PATTERN", "value": "\\w+"} - } -} diff --git a/test/fixtures/test_grammars/uses_current_column/scanner.c b/test/fixtures/test_grammars/uses_current_column/scanner.c index 62b1639..b2b5928 100644 --- a/test/fixtures/test_grammars/uses_current_column/scanner.c +++ b/test/fixtures/test_grammars/uses_current_column/scanner.c @@ -1,6 +1,7 @@ +#include "tree_sitter/parser.h" + #include #include -#include enum TokenType { INDENT, diff --git a/test/fuzz/README.md b/test/fuzz/README.md index a02d268..5adc1b0 100644 --- a/test/fuzz/README.md +++ b/test/fuzz/README.md @@ -34,7 +34,7 @@ The `run-fuzzer` script handles running an individual fuzzer with a sensible def which will log information to stdout. Failing testcases and a fuzz corpus will be saved to `fuzz-results/`. The most important extra `libFuzzer` options are `-jobs` and `-workers` which allow parallel fuzzing. This is can done with, e.g.: ``` -./script/run-fuzzer halt -jobs=32 -workers=32 +./script/run-fuzzer halt -jobs=32 -workers=32 ``` The testcase can be used to reproduce the crash by running: diff --git a/test/fuzz/fuzzer.cc b/test/fuzz/fuzzer.cc index ef80088..3b93374 100644 --- a/test/fuzz/fuzzer.cc +++ b/test/fuzz/fuzzer.cc @@ -47,7 +47,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { TSTree *tree = ts_parser_parse_string(parser, NULL, str, size); TSNode root_node = ts_tree_root_node(tree); - if (lang_query) { + if (lang_query != nullptr) { { TSQueryCursor *cursor = ts_query_cursor_new(); diff --git a/test/fuzz/gen-dict.py b/test/fuzz/gen-dict.py deleted file mode 100644 index f8cf834..0000000 --- a/test/fuzz/gen-dict.py +++ /dev/null @@ -1,31 +0,0 @@ -import json -import sys - -def find_literals(literals, node): - '''Recursively find STRING literals in the grammar definition''' - - if type(node) is dict: - if 'type' in node and node['type'] == 'STRING' and 'value' in node: - literals.add(node['value']) - - for key, value in node.iteritems(): - find_literals(literals, value) - - elif type(node) is list: - for item in node: - find_literals(literals, item) - -def main(): - '''Generate a libFuzzer / AFL dictionary from a tree-sitter grammar.json''' - with open(sys.argv[1]) as f: - grammar = json.load(f) - - literals = set() - find_literals(literals, grammar) - - for lit in sorted(literals): - if lit: - print '"%s"' % ''.join(['\\x%02x' % ord(b) for b in lit.encode('utf-8')]) - -if __name__ == '__main__': - main() diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml new file mode 100644 index 0000000..4f270db --- /dev/null +++ b/xtask/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "xtask" +version = "0.1.0" +authors.workspace = true +edition.workspace = true +rust-version.workspace = true +homepage.workspace = true +repository.workspace = true +license.workspace = true +keywords.workspace = true +categories.workspace = true +publish = false + +[dependencies] +git2.workspace = true +indoc.workspace = true +toml.workspace = true +semver.workspace = true +serde.workspace = true +serde_json.workspace = true diff --git a/xtask/src/bump.rs b/xtask/src/bump.rs new file mode 100644 index 0000000..02cda49 --- /dev/null +++ b/xtask/src/bump.rs @@ -0,0 +1,282 @@ +use std::{cmp::Ordering, path::Path}; + +use git2::{DiffOptions, Repository}; +use indoc::indoc; +use semver::{BuildMetadata, Prerelease, Version}; +use toml::Value; + +pub fn get_latest_tag(repo: &Repository) -> Result> { + let mut tags = repo + .tag_names(None)? + .into_iter() + .filter_map(|tag| tag.map(String::from)) + .filter_map(|tag| Version::parse(tag.strip_prefix('v').unwrap_or(&tag)).ok()) + .collect::>(); + + tags.sort_by( + |a, b| match (a.pre != Prerelease::EMPTY, b.pre != Prerelease::EMPTY) { + (true, true) | (false, false) => a.cmp(b), + (true, false) => Ordering::Less, + (false, true) => Ordering::Greater, + }, + ); + + tags.last() + .map(std::string::ToString::to_string) + .ok_or_else(|| "No tags found".into()) +} + +pub fn bump_versions() -> Result<(), Box> { + let repo = Repository::open(".")?; + let latest_tag = get_latest_tag(&repo)?; + let latest_tag_sha = repo.revparse_single(&format!("v{latest_tag}"))?.id(); + + let workspace_toml_version = fetch_workspace_version()?; + + if latest_tag != workspace_toml_version { + eprintln!( + indoc! {" + Seems like the workspace Cargo.toml ({}) version does not match up with the latest git tag ({}). + Please ensure you don't change that yourself, this subcommand will handle this for you. + "}, + workspace_toml_version, latest_tag + ); + return Ok(()); + } + + let mut revwalk = repo.revwalk()?; + revwalk.push_range(format!("{latest_tag_sha}..HEAD").as_str())?; + let mut diff_options = DiffOptions::new(); + + let current_version = Version::parse(&latest_tag)?; + let mut should_increment_patch = false; + let mut should_increment_minor = false; + + for oid in revwalk { + let oid = oid?; + let commit = repo.find_commit(oid)?; + let message = commit.message().unwrap(); + let message = message.trim(); + + let diff = { + let parent = commit.parent(0).unwrap(); + let parent_tree = parent.tree().unwrap(); + let commit_tree = commit.tree().unwrap(); + repo.diff_tree_to_tree( + Some(&parent_tree), + Some(&commit_tree), + Some(&mut diff_options), + )? + }; + + let mut source_code_changed = false; + diff.foreach( + &mut |delta, _| { + let path = delta.new_file().path().unwrap().to_str().unwrap(); + if path.ends_with("rs") || path.ends_with("js") || path.ends_with('c') { + source_code_changed = true; + } + true + }, + None, + None, + None, + )?; + + if source_code_changed { + should_increment_patch = true; + + let Some((prefix, _)) = message.split_once(':') else { + continue; + }; + + let convention = if prefix.contains('(') { + prefix.split_once('(').unwrap().0 + } else { + prefix + }; + + if ["feat", "feat!"].contains(&convention) || prefix.ends_with('!') { + should_increment_minor = true; + } + } + } + + let mut version = current_version.clone(); + if should_increment_minor { + version.minor += 1; + version.patch = 0; + version.pre = Prerelease::EMPTY; + version.build = BuildMetadata::EMPTY; + } else if should_increment_patch { + version.patch += 1; + version.pre = Prerelease::EMPTY; + version.build = BuildMetadata::EMPTY; + } else { + return Err(format!("No source code changed since {current_version}").into()); + } + + println!("Bumping from {current_version} to {version}"); + update_crates(¤t_version, &version)?; + update_makefile(&version)?; + update_npm(&version)?; + update_zig(&version)?; + tag_next_version(&repo, &version)?; + + Ok(()) +} + +fn tag_next_version( + repo: &Repository, + next_version: &Version, +) -> Result<(), Box> { + // first add the manifests + + let mut index = repo.index()?; + + for file in [ + "Cargo.toml", + "Cargo.lock", + "cli/Cargo.toml", + "cli/config/Cargo.toml", + "cli/loader/Cargo.toml", + "lib/Cargo.toml", + "highlight/Cargo.toml", + "tags/Cargo.toml", + "cli/npm/package.json", + "lib/binding_web/package.json", + "Makefile", + "build.zig.zon", + ] { + index.add_path(Path::new(file))?; + } + + index.write()?; + + let tree_id = index.write_tree()?; + let tree = repo.find_tree(tree_id)?; + let signature = repo.signature()?; + let parent_commit = repo.revparse_single("HEAD")?.peel_to_commit()?; + + let commit_id = repo.commit( + Some("HEAD"), + &signature, + &signature, + &format!("{next_version}"), + &tree, + &[&parent_commit], + )?; + + let tag = repo.tag( + &format!("v{next_version}"), + &repo.find_object(commit_id, None)?, + &signature, + &format!("v{next_version}"), + false, + )?; + + println!("Tagged commit {commit_id} with tag {tag}"); + + Ok(()) +} + +fn update_makefile(next_version: &Version) -> Result<(), Box> { + let makefile = std::fs::read_to_string("Makefile")?; + let makefile = makefile + .lines() + .map(|line| { + if line.starts_with("VERSION") { + format!("VERSION := {next_version}") + } else { + line.to_string() + } + }) + .collect::>() + .join("\n") + + "\n"; + + std::fs::write("Makefile", makefile)?; + + Ok(()) +} + +fn update_crates( + current_version: &Version, + next_version: &Version, +) -> Result<(), Box> { + let mut cmd = std::process::Command::new("cargo"); + cmd.arg("workspaces").arg("version"); + + if next_version.minor > current_version.minor { + cmd.arg("minor"); + } else { + cmd.arg("patch"); + } + + cmd.arg("--no-git-commit") + .arg("--yes") + .arg("--force") + .arg("*"); + + let status = cmd.status()?; + + if !status.success() { + return Err("Failed to update crates".into()); + } + + Ok(()) +} + +fn update_npm(next_version: &Version) -> Result<(), Box> { + for path in ["lib/binding_web/package.json", "cli/npm/package.json"] { + let package_json = + serde_json::from_str::(&std::fs::read_to_string(path)?)?; + + let mut package_json = package_json + .as_object() + .ok_or("Invalid package.json")? + .clone(); + package_json.insert( + "version".to_string(), + serde_json::Value::String(next_version.to_string()), + ); + + let package_json = serde_json::to_string_pretty(&package_json)? + "\n"; + + std::fs::write(path, package_json)?; + } + + Ok(()) +} + +fn update_zig(next_version: &Version) -> Result<(), Box> { + let zig = std::fs::read_to_string("build.zig.zon")?; + + let zig = zig + .lines() + .map(|line| { + if line.starts_with(" .version") { + format!(" .version = \"{next_version}\",") + } else { + line.to_string() + } + }) + .collect::>() + .join("\n") + + "\n"; + + std::fs::write("build.zig.zon", zig)?; + + Ok(()) +} + +/// read Cargo.toml and get the version +fn fetch_workspace_version() -> Result> { + let cargo_toml = toml::from_str::(&std::fs::read_to_string("Cargo.toml")?)?; + + Ok(cargo_toml["workspace"]["package"]["version"] + .as_str() + .unwrap() + .trim_matches('"') + .to_string()) +} diff --git a/xtask/src/main.rs b/xtask/src/main.rs new file mode 100644 index 0000000..80bbbdd --- /dev/null +++ b/xtask/src/main.rs @@ -0,0 +1,35 @@ +mod bump; + +use bump::bump_versions; + +fn print_help() { + println!( + " +xtask must specify a task to run. + +Usage: `cargo xtask ` + +Tasks: + bump-version +" + ); +} + +fn main() -> Result<(), Box> { + let Some(task) = std::env::args().nth(1) else { + print_help(); + std::process::exit(0); + }; + + match task.as_str() { + "bump-version" => { + bump_versions()?; + } + _ => { + println!("invalid task: {task}"); + std::process::exit(1); + } + } + + Ok(()) +}