Skip to content

Commit

Permalink
Improve performance of TANE-based algorithms
Browse files Browse the repository at this point in the history
Remove AUCC discovery artifacts, perform additional key pruning.
  • Loading branch information
iliya-b committed Oct 21, 2024
1 parent 79e9d79 commit c396254
Showing 1 changed file with 35 additions and 46 deletions.
81 changes: 35 additions & 46 deletions src/core/algorithms/fd/tane/tane_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,46 +43,40 @@ void TaneCommon::Prune(model::LatticeLevel* level) {

if (vertex->GetIsKeyCandidate()) {
double ucc_error = CalculateUccError(vertex->GetPositionListIndex(), relation_.get());
if (ucc_error <= max_ucc_error_) { // If a key candidate is an approx UCC

if (ucc_error == 0) { // if a (super) key
vertex->SetKeyCandidate(false);
if (ucc_error == 0) {
for (std::size_t rhs_index = vertex->GetRhsCandidates().find_first();
rhs_index != boost::dynamic_bitset<>::npos;
rhs_index = vertex->GetRhsCandidates().find_next(rhs_index)) {
Vertical rhs = static_cast<Vertical>(*schema->GetColumn((int)rhs_index));
if (!columns.Contains(rhs)) {
bool is_rhs_candidate = true;
for (auto const& column : columns.GetColumns()) {
Vertical sibling =
columns.Without(static_cast<Vertical>(*column)).Union(rhs);
auto sibling_vertex =
level->GetLatticeVertex(sibling.GetColumnIndices());
if (sibling_vertex == nullptr ||
!sibling_vertex->GetConstRhsCandidates()
[rhs.GetColumnIndices().find_first()]) {
is_rhs_candidate = false;
break;
}
// for each outer rhs: if there is a sibling s.t. it doesn't
// have this rhs, there is no FD: vertex->rhs
}
// Found fd: vertex->rhs => register it
if (is_rhs_candidate) {
RegisterAndCountFd(columns, schema->GetColumn(rhs_index));
for (std::size_t rhs_index = vertex->GetRhsCandidates().find_first();
rhs_index != boost::dynamic_bitset<>::npos;
rhs_index = vertex->GetRhsCandidates().find_next(rhs_index)) {
Vertical rhs = static_cast<Vertical>(*schema->GetColumn((int)rhs_index));
if (!columns.Contains(rhs)) {
bool is_rhs_candidate = true;
for (auto const& column : columns.GetColumns()) {
Vertical sibling =
columns.Without(static_cast<Vertical>(*column)).Union(rhs);
auto sibling_vertex =
level->GetLatticeVertex(sibling.GetColumnIndices());
if (sibling_vertex == nullptr ||
!sibling_vertex->GetConstRhsCandidates()[rhs.GetColumnIndices()
.find_first()]) {
is_rhs_candidate = false;
break;
}
// for each outer rhs: if there is a sibling s.t. it doesn't
// have this rhs, there is no FD: vertex->rhs
}
// Found fd: vertex->rhs => register it
if (is_rhs_candidate) {
RegisterAndCountFd(columns, schema->GetColumn(rhs_index));
}
}
key_vertices.push_back(vertex.get());
}
key_vertices.push_back(vertex.get());
}
}
// if we seek for exact FDs then SetInvalid
if (max_fd_error_ == 0 && max_ucc_error_ == 0) {
for (auto key_vertex : key_vertices) {
key_vertex->GetRhsCandidates() &= key_vertex->GetVertical().GetColumnIndices();
key_vertex->SetInvalid(true);
}
for (auto key_vertex : key_vertices) {
key_vertex->GetRhsCandidates() &= key_vertex->GetVertical().GetColumnIndices();
key_vertex->SetInvalid(true);
}
}
}
Expand Down Expand Up @@ -196,22 +190,17 @@ unsigned long long TaneCommon::ExecuteInternal() {
ColumnData const& column_data =
relation_->GetColumnData(column.GetColumnIndices().find_first());
double ucc_error = CalculateUccError(column_data.GetPositionListIndex(), relation_.get());
if (ucc_error <= max_ucc_error_) {
if (ucc_error == 0 && max_lhs_ != 0) {
vertex->SetKeyCandidate(false);
if (ucc_error == 0 && max_lhs_ != 0) {
for (unsigned long rhs_index = vertex->GetRhsCandidates().find_first();
rhs_index < vertex->GetRhsCandidates().size();
rhs_index = vertex->GetRhsCandidates().find_next(rhs_index)) {
if (rhs_index != column.GetColumnIndices().find_first()) {
RegisterAndCountFd(column, schema->GetColumn(rhs_index));
}
}
vertex->GetRhsCandidates() &= column.GetColumnIndices();
// set vertex invalid if we seek for exact dependencies
if (max_fd_error_ == 0 && max_ucc_error_ == 0) {
vertex->SetInvalid(true);
for (unsigned long rhs_index = vertex->GetRhsCandidates().find_first();
rhs_index < vertex->GetRhsCandidates().size();
rhs_index = vertex->GetRhsCandidates().find_next(rhs_index)) {
if (rhs_index != column.GetColumnIndices().find_first()) {
RegisterAndCountFd(column, schema->GetColumn(rhs_index));
}
}
vertex->GetRhsCandidates() &= column.GetColumnIndices();
vertex->SetInvalid(true);
}
}
levels.push_back(std::move(level1));
Expand Down

0 comments on commit c396254

Please sign in to comment.