Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

December 2024 fixes #503

Merged
merged 14 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@
- added `-fdump-ir` to dump the IR entities to a file named `{file}.ark.ir`
- added 11 super instructions and their implementation to the VM
- support for the glob import syntax and symbol import syntax
- (experimental) `(string:setAt string index char)`
- modify list and return a copy `(string:setAt string index char)` (bound checked)
- added in place list mutation: `(@= list|string index new_value)`, `(@@= list|list<string> index1 index2 new_value|char)` (bound checked)
- compile time argument count check for `and` and `or`
- basic dead code elimination in the AST optimizer

### Changed
- instructions are on 4 bytes: 1 byte for the instruction, 1 byte of padding, 2 bytes for an immediate argument
Expand Down Expand Up @@ -93,7 +96,9 @@
- adding a `CALL_BUILTIN <builtin> <arg count>` super instruction
- fixed formatting of comments after the last symbol in an import node
- renamed `str:xyz` builtins to `string:xyz` for uniformity with the standard library
- `string:find` takes an optional third argument, startIndex (where to start the lookup from, default 0)
- `string:find` takes an optional third argument, startIndex (where to start the lookup from, default 0
- `list:setAt` can work with negative indexes, and is now bound checked
- re-enabled the AST optimizer, only used for the main `arkscript` executable (not enabled when embedding arkscript, so that one can grab variables from the VM)

### Removed
- removed unused `NodeType::Closure`
Expand Down
25 changes: 4 additions & 21 deletions include/Ark/Compiler/AST/Optimizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@
#include <functional>
#include <unordered_map>
#include <string>
#include <cinttypes>

#include <Ark/Compiler/Pass.hpp>
#include <Ark/Compiler/AST/Node.hpp>
#include <Ark/Exceptions.hpp>

namespace Ark::internal
{
Expand Down Expand Up @@ -56,33 +54,18 @@ namespace Ark::internal
std::unordered_map<std::string, unsigned> m_sym_appearances;

/**
* @brief Generate a fancy error message
* @brief Count the occurrences of each symbol in the AST, recursively, and prune if false/true, while false/true
*
* @param message
* @param node
*/
[[noreturn]] static void throwOptimizerError(const std::string& message, const Node& node);
void countAndPruneDeadCode(Node& node);

/**
* @brief Iterate over the AST and remove unused top level functions and constants
*
*/
void removeUnused();

/**
* @brief Run a given functor on the global scope symbols
*
* @param node
* @param func
*/
void runOnGlobalScopeVars(Node& node, const std::function<void(Node&, Node&, std::size_t)>& func);

/**
* @brief Count the occurrences of each symbol in the AST, recursively
* @brief Remove unused global variables from the AST
*
* @param node
*/
void countOccurences(Node& node);
void pruneUnusedGlobalVariables(Node& node);
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ namespace Ark::internal
[[nodiscard]] std::optional<IR::Entity> compactEntities(const IR::Entity& first, const IR::Entity& second);
[[nodiscard]] std::optional<IR::Entity> compactEntities(const IR::Entity& first, const IR::Entity& second, const IR::Entity& third);

[[nodiscard]] bool isNumber(uint16_t id, double expected_number) const;
[[nodiscard]] bool isPositiveNumberInlinable(uint16_t id) const;
};
}

Expand Down
3 changes: 1 addition & 2 deletions include/Ark/Constants.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ namespace Ark
// Compiler options
constexpr uint16_t FeatureImportSolver = 1 << 0;
constexpr uint16_t FeatureMacroProcessor = 1 << 1;
constexpr uint16_t FeatureASTOptimizer = 1 << 2;
constexpr uint16_t FeatureASTOptimizer = 1 << 2; ///< This is disabled so that embedding ArkScript does not prune nodes from the AST ; it is active in the `arkscript` executable
constexpr uint16_t FeatureIROptimizer = 1 << 3;
constexpr uint16_t FeatureNameResolver = 1 << 4;

Expand All @@ -60,7 +60,6 @@ namespace Ark
constexpr uint16_t DefaultFeatures =
FeatureImportSolver
| FeatureMacroProcessor
| FeatureASTOptimizer
| FeatureIROptimizer
| FeatureNameResolver;

Expand Down
2 changes: 2 additions & 0 deletions include/Ark/Files.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ namespace Ark::Utils
return std::vector<uint8_t> {};

const auto pos = ifs.tellg();
if (pos == 0)
return {};
// reserve appropriate number of bytes
std::vector<char> temp(static_cast<std::size_t>(pos));
ifs.seekg(0, std::ios::beg);
Expand Down
3 changes: 2 additions & 1 deletion include/CLI/JsonCompiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ class JsonCompiler final
*
* @param debug the debug level
* @param lib_env list of path to the directories of the std lib
* @param features compiler features to enable. By default, none are active to be able to dump a file AST without any processing
*/
JsonCompiler(unsigned debug, const std::vector<std::filesystem::path>& lib_env);
JsonCompiler(unsigned debug, const std::vector<std::filesystem::path>& lib_env, uint16_t features = 0);

/**
* @brief Feed the different variables with information taken from the given source code file
Expand Down
11 changes: 10 additions & 1 deletion src/arkreactor/Builtins/List.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,16 @@ namespace Ark::internal::Builtins::List
types::Typedef("value", ValueType::Any) } } } },
n);

n[0].list()[static_cast<std::size_t>(n[1].number())] = n[2];
auto& list = n[0].list();

const std::size_t size = list.size();
long idx = static_cast<long>(n[1].number());
idx = idx < 0 ? static_cast<long>(size) + idx : idx;
if (std::cmp_greater_equal(idx, size))
throw std::runtime_error(
fmt::format("IndexError: list:setAt index ({}) out of range (list size: {})", idx, size));

list[static_cast<std::size_t>(idx)] = n[2];
return n[0];
}
}
11 changes: 10 additions & 1 deletion src/arkreactor/Builtins/String.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,16 @@ namespace Ark::internal::Builtins::String
types::Typedef("value", ValueType::String) } } } },
n);

n[0].stringRef()[static_cast<std::size_t>(n[1].number())] = n[2].string()[0];
auto& string = n[0].stringRef();

const std::size_t size = string.size();
long idx = static_cast<long>(n[1].number());
idx = idx < 0 ? static_cast<long>(size) + idx : idx;
if (std::cmp_greater_equal(idx, size))
throw std::runtime_error(
fmt::format("IndexError: string:setAt index ({}) out of range (string size: {})", idx, size));

string[static_cast<std::size_t>(idx)] = n[2].string()[0];
return n[0];
}
}
1 change: 0 additions & 1 deletion src/arkreactor/Compiler/AST/Node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,6 @@ namespace Ark::internal
}

case NodeType::Unused:
os << "Unused:" << string();
break;
}
return os;
Expand Down
132 changes: 79 additions & 53 deletions src/arkreactor/Compiler/AST/Optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,86 +8,112 @@ namespace Ark::internal

void Optimizer::process(const Node& ast)
{
m_logger.traceStart("process");
// do not handle non-list nodes
if (ast.nodeType() != NodeType::List)
return;
m_ast = ast;
// todo: activate this removeUnused();

m_logger.traceStart("process");
countAndPruneDeadCode(m_ast);

// logic: remove piece of code with only 1 reference, if they aren't function calls
pruneUnusedGlobalVariables(m_ast);
m_logger.traceEnd();

m_logger.trace("AST after name pruning nodes");
if (m_logger.shouldTrace())
m_ast.debugPrint(std::cout) << '\n';
}

const Node& Optimizer::ast() const noexcept
{
return m_ast;
}

void Optimizer::throwOptimizerError(const std::string& message, const Node& node)
{
throw CodeError(message, node.filename(), node.line(), node.col(), node.repr());
}

void Optimizer::removeUnused()
void Optimizer::countAndPruneDeadCode(Node& node)
{
// do not handle non-list nodes
if (m_ast.nodeType() != NodeType::List)
return;
if (node.nodeType() == NodeType::Symbol || node.nodeType() == NodeType::Capture)
{
auto [element, inserted] = m_sym_appearances.try_emplace(node.string(), 0);
if (!inserted)
element->second++;
}
else if (node.nodeType() == NodeType::Field)
{
for (auto& child : node.list())
countAndPruneDeadCode(child);
}
else if (node.nodeType() == NodeType::List)
{
// FIXME: very primitive removal of (if true/false ...) and (while false ...)
if (node.constList().size() > 1 && node.constList().front().nodeType() == NodeType::Keyword &&
(node.constList().front().keyword() == Keyword::If || node.constList().front().keyword() == Keyword::While))
{
const auto keyword = node.constList().front().keyword();
const auto condition = node.constList()[1];
const auto body = node.constList()[2];

runOnGlobalScopeVars(m_ast, [this](const Node& node, Node& parent [[maybe_unused]], int idx [[maybe_unused]]) {
m_sym_appearances[node.constList()[1].string()] = 0;
});
countOccurences(m_ast);
if (condition.nodeType() == NodeType::Symbol && condition.string() == "false")
{
// replace the node by an Unused, it is either a (while cond block) or (if cond then)
if (node.constList().size() == 3)
node = Node(NodeType::Unused);
else // it is a (if cond then else)
{
const auto back = node.constList().back();
node = back;
}
}
// only update '(if true then [else])' to 'then'
else if (keyword == Keyword::If && condition.nodeType() == NodeType::Symbol && condition.string() == "true")
node = body;

// logic: remove piece of code with only 1 reference, if they aren't function calls
runOnGlobalScopeVars(m_ast, [this](const Node& node, Node& parent, const std::size_t idx) {
std::string name = node.constList()[1].string();
// a variable was only declared and never used
if (m_sym_appearances.contains(name) && m_sym_appearances[name] == 1 && parent.list()[idx].list()[2].nodeType() != NodeType::List)
{
m_logger.debug("Removing unused variable '{}'", name);
// erase the node from the list
parent.list().erase(parent.list().begin() + static_cast<std::vector<Node>::difference_type>(idx));
// do not try to iterate on the child nodes as they do not exist anymore,
// we performed some optimization that squashed them.
if (!node.isListLike())
return;
}
});

// iterate over children
for (auto& child : node.list())
countAndPruneDeadCode(child);
}
else if (node.nodeType() == NodeType::Namespace)
countAndPruneDeadCode(*node.arkNamespace().ast);
}

void Optimizer::runOnGlobalScopeVars(Node& node, const std::function<void(Node&, Node&, std::size_t)>& func)
void Optimizer::pruneUnusedGlobalVariables(Node& node)
{
auto i = node.constList().size();
// iterate only on the first level, using reverse iterators to avoid copy-delete-move to nowhere
for (auto it = node.list().rbegin(); it != node.list().rend(); ++it)
for (auto& child : node.list())
{
i--;

if (it->nodeType() == NodeType::List && !it->constList().empty() &&
it->constList()[0].nodeType() == NodeType::Keyword)
if (child.nodeType() == NodeType::List && !child.constList().empty() &&
child.constList()[0].nodeType() == NodeType::Keyword)
{
Keyword kw = it->constList()[0].keyword();
const Keyword kw = child.constList()[0].keyword();

// eliminate nested begin blocks
if (kw == Keyword::Begin)
{
runOnGlobalScopeVars(*it, func);
pruneUnusedGlobalVariables(child);
// skip let/ mut detection
continue;
}
// check if it's a let/mut declaration

// check if it's a let/mut declaration and perform removal
if (kw == Keyword::Let || kw == Keyword::Mut)
func(*it, node, i);
{
const std::string name = child.constList()[1].string();
// a variable was only declared and never used
if (m_sym_appearances.contains(name) && m_sym_appearances[name] < 1)
{
m_logger.debug("Removing unused variable '{}'", name);
// erase the node by turning it to an Unused node
child = Node(NodeType::Unused);
}
}
}
}
}

void Optimizer::countOccurences(Node& node)
{
if (node.nodeType() == NodeType::Symbol || node.nodeType() == NodeType::Capture)
{
// check if it's the name of something declared in global scope
if (const auto name = node.string(); m_sym_appearances.contains(name))
m_sym_appearances[name]++;
}
else if (node.nodeType() == NodeType::List)
{
// iterate over children
for (std::size_t i = 0, end = node.constList().size(); i != end; ++i)
countOccurences(node.list()[i]);
else if (child.nodeType() == NodeType::Namespace)
pruneUnusedGlobalVariables(*child.arkNamespace().ast);
}
}
}
23 changes: 16 additions & 7 deletions src/arkreactor/Compiler/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ namespace Ark::internal
// namespace nodes
else if (x.nodeType() == NodeType::Namespace)
compileExpression(*x.constArkNamespace().ast, p, is_result_unused, is_terminal, var_name);
else if (x.nodeType() == NodeType::Unused)
{
// do nothing, explicitly
}
// empty code block should be nil
else if (x.constList().empty())
{
Expand Down Expand Up @@ -248,9 +252,9 @@ namespace Ark::internal
if (inst <= POP && std::cmp_greater(argc, std::numeric_limits<uint16_t>::max()))
throwCompilerError(fmt::format("Too many arguments ({}), exceeds 65'535", argc), x);
if (argc != 3 && inst == SET_AT_INDEX)
throwCompilerError(fmt::format("Expected 3 arguments for {}, got {}", name, argc), c0);
throwCompilerError(fmt::format("Expected 3 arguments (list, index, value) for {}, got {}", name, argc), c0);
if (argc != 4 && inst == SET_AT_2_INDEX)
throwCompilerError(fmt::format("Expected 4 arguments for {}, got {}", name, argc), c0);
throwCompilerError(fmt::format("Expected 4 arguments (list, y, x, value) for {}, got {}", name, argc), c0);

// compile arguments in reverse order
for (std::size_t i = x.constList().size() - 1u; i > 0; --i)
Expand Down Expand Up @@ -450,6 +454,13 @@ namespace Ark::internal
if (maybe_shortcircuit.has_value())
{
// short circuit implementation
if (x.constList().size() < 3)
throwCompilerError(
fmt::format(
"Expected at least 2 arguments while compiling '{}', got {}",
node.string(),
x.constList().size() - 1),
x);

compileExpression(x.constList()[1], p, false, false);
page(p).emplace_back(DUP);
Expand Down Expand Up @@ -558,9 +569,7 @@ namespace Ark::internal
page(p).emplace_back(op);
}
else if (exp_count <= 1)
{
throwCompilerError(fmt::format("Operator needs two arguments, but was called with {}", exp_count), x.constList()[0]);
}

// need to check we didn't push the (op A B C D...) things for operators not supporting it
if (exp_count > 2)
Expand All @@ -578,9 +587,9 @@ namespace Ark::internal
default:
throwCompilerError(
fmt::format(
"can not create a chained expression (of length {}) for operator `{}'. You most likely forgot a `)'.",
exp_count,
Language::operators[static_cast<std::size_t>(op - FIRST_OPERATOR)]),
"`{}' requires 2 arguments, but got {}.",
Language::operators[static_cast<std::size_t>(op - FIRST_OPERATOR)],
exp_count),
x);
}
}
Expand Down
Loading
Loading