From 8a314db8dc9db2c617226cd3fafd2944749a1c93 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 22 Mar 2024 15:39:52 +0100 Subject: [PATCH 01/11] Bug fix --- src/map/if/acd/acd66.hpp | 2 +- src/map/if/acd/acd666.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 5891f7b90..aea9752e6 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -1024,7 +1024,7 @@ class acd66_impl if ( support_vars2 > support_vars1 ) { f = isets0[0] | isets1[1]; - std::swap( fs_fun[3], fs_fun[4] ); + std::swap( fs_fun[2], fs_fun[3] ); bs_support_size = support_vars1; } diff --git a/src/map/if/acd/acd666.hpp b/src/map/if/acd/acd666.hpp index 72ae68ec4..1086af0a3 100644 --- a/src/map/if/acd/acd666.hpp +++ b/src/map/if/acd/acd666.hpp @@ -646,7 +646,7 @@ class acd666_impl { f._bits[i] = isets0[0]._bits[i] | isets1[1]._bits[i]; } - std::swap( fs_fun[3], fs_fun[4] ); + std::swap( fs_fun[2], fs_fun[3] ); rm_support_size = support_vars1; } @@ -705,7 +705,7 @@ class acd666_impl if ( support_vars2 > support_vars1 ) { f = isets0[0] | isets1[1]; - std::swap( fs_fun[3], fs_fun[4] ); + std::swap( fs_fun[2], fs_fun[3] ); support_sizes[0] = support_vars1; } From 6aacf524aa1fbb1fa010cdf07fa0fbea97b1137b Mon Sep 17 00:00:00 2001 From: aletempiac Date: Fri, 22 Mar 2024 19:19:35 +0100 Subject: [PATCH 02/11] Performance improvement and fixes --- src/map/if/acd/ac_decomposition.hpp | 54 ++++++++++++++++++----------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp index d573da2cd..0ad1e34c2 100644 --- a/src/map/if/acd/ac_decomposition.hpp +++ b/src/map/if/acd/ac_decomposition.hpp @@ -118,11 +118,12 @@ class ac_decomposition_impl } if ( late_arriving > ps.max_free_set_vars ) { - ps.max_free_set_vars = late_arriving; + return -1; /* on average avoiding this computation leads to better quality */ + // ps.max_free_set_vars = late_arriving; } /* return a high cost if too many late arriving variables */ - if ( late_arriving > ps.lut_size - 1 || late_arriving > ps.max_free_set_vars ) + if ( late_arriving > ps.lut_size - 1 ) { return -1; } @@ -632,13 +633,10 @@ class ac_decomposition_impl uint32_t k = 0; for ( uint32_t j = 0; j < num_vars - best_free_set; ++j ) { - if ( !kitty::has_var( tt, j ) ) - continue; - if ( !kitty::has_var( tt, care, j ) ) { /* fix truth table */ - adjust_truth_table_on_dc( tt, care, j ); + adjust_truth_table_on_dc( tt, care, tt.num_vars(), j ); continue; } @@ -788,7 +786,14 @@ class ac_decomposition_impl } } support_minimization_encodings = std::vector>( num_combs ); - generate_support_minimization_encodings_rec( 0, 0, 0, count ); + generate_support_minimization_encodings_rec( 0, 0, 0, count ); + } + else if ( best_multiplicity > 8 ) + { + /* combinations are 2^(mu - 1) */ + num_combs = 1u << ( best_multiplicity - 1 ); + support_minimization_encodings = std::vector>( num_combs ); + generate_support_minimization_encodings_rec( 0, 0, 0, count ); } else { @@ -798,18 +803,18 @@ class ac_decomposition_impl num_combs = ( num_combs << 1 ) + num_combs; } support_minimization_encodings = std::vector>( num_combs ); - generate_support_minimization_encodings_rec( 0, 0, 0, count ); + generate_support_minimization_encodings_rec( 0, 0, 0, count ); } assert( count == num_combs ); } - template + template void generate_support_minimization_encodings_rec( uint32_t onset, uint32_t offset, uint32_t var, uint32_t& count ) { if ( var == best_multiplicity ) { - if ( !enable_dcset ) + if ( equal_size_partition ) { /* sets must be equally populated */ if ( __builtin_popcount( onset ) != __builtin_popcount( offset ) ) @@ -827,12 +832,12 @@ class ac_decomposition_impl /* var in DCSET */ if ( enable_dcset ) { - generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); + generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); } /* move var in ONSET */ onset |= 1 << var; - generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); + generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); onset &= ~( 1 << var ); /* remove symmetries */ @@ -843,7 +848,7 @@ class ac_decomposition_impl /* move var in OFFSET */ offset |= 1 << var; - generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); + generate_support_minimization_encodings_rec( onset, offset, var + 1, count ); offset &= ~( 1 << var ); } @@ -1042,6 +1047,13 @@ class ac_decomposition_impl for ( uint32_t j = 0; j < iset_support; ++j ) { cost += has_var_support( tt, care, iset_support, j ) ? 1 : 0; + // if ( !has_var_support( tt, care, iset_support, j ) ) + // { + // /* adjust truth table and care set */ + // adjust_truth_table_on_dc( tt, care, iset_support, j ); + // continue; + // } + // ++cost; } /* discard solutions with support over LUT size */ @@ -1186,7 +1198,7 @@ class ac_decomposition_impl return res; } - bool covering_improve( std::vector& matrix, std::array& solution ) + bool covering_improve( std::vector const& matrix, std::array& solution ) { /* performs one iteration of local search */ uint32_t best_cost = 0, local_cost = 0; @@ -1233,21 +1245,23 @@ class ac_decomposition_impl return improved; } - void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t var_index ) + void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t real_num_vars, uint32_t var_index ) { - assert( var_index < tt.num_vars() ); + assert( var_index < real_num_vars ); assert( tt.num_vars() == care.num_vars() ); - if ( tt.num_vars() <= 6 || var_index < 6 ) + const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) ); + if ( real_num_vars <= 6 || var_index < 6 ) { auto it_tt = std::begin( tt._bits ); auto it_care = std::begin( care._bits ); - while ( it_tt != std::end( tt._bits ) ) + while ( it_tt != std::begin( tt._bits ) + num_blocks ) { uint64_t new_bits = *it_tt & *it_care; *it_tt = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); - *it_care = *it_care | ( *it_care >> ( uint64_t( 1 ) << var_index ) ); + *it_care = ( *it_care | ( *it_care >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index]; + *it_care = *it_care | ( *it_care << ( uint64_t( 1 ) << var_index ) ); ++it_tt; ++it_care; @@ -1256,7 +1270,7 @@ class ac_decomposition_impl } const auto step = 1 << ( var_index - 6 ); - for ( auto i = 0u; i < static_cast( tt.num_blocks() ); i += 2 * step ) + for ( auto i = 0u; i < static_cast( num_blocks ); i += 2 * step ) { for ( auto j = 0; j < step; ++j ) { From 1f72ffce79413c617840f9a4ca1a33e4f81d974e Mon Sep 17 00:00:00 2001 From: aletempiac Date: Mon, 25 Mar 2024 14:23:43 +0100 Subject: [PATCH 03/11] Improving ACD performance with bail-out conditions --- src/map/if/acd/ac_decomposition.hpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp index 0ad1e34c2..a384cc2e1 100644 --- a/src/map/if/acd/ac_decomposition.hpp +++ b/src/map/if/acd/ac_decomposition.hpp @@ -232,7 +232,7 @@ class ac_decomposition_impl best_cost = multiplicity + additional_cost; best_free_set = i; - if ( !ps.use_first ) + if ( !ps.use_first && multiplicity > 2 ) { continue; } @@ -270,7 +270,7 @@ class ac_decomposition_impl best_cost = multiplicity + additional_cost; best_free_set = i; - if ( !ps.use_first ) + if ( !ps.use_first && multiplicity > 2 ) { continue; } @@ -478,6 +478,13 @@ class ac_decomposition_impl pComb[i] = pInvPerm[i] = i; } + /* early bail-out conditions */ + uint32_t bail_multiplicity = 2; + if ( best_multiplicity < UINT32_MAX ) + { + bail_multiplicity = ( best_multiplicity >> 1 ) + ( best_multiplicity & 1 ); + } + /* enumerate combinations */ do { @@ -490,6 +497,11 @@ class ac_decomposition_impl { bestPerm[i] = pComb[i]; } + + if ( best_cost <= bail_multiplicity ) + { + break; + } } } while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) ); From 8f3447800cc93f891cc84bb4bbc88e76bb5d6ef7 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Tue, 2 Apr 2024 11:25:03 +0200 Subject: [PATCH 04/11] Support again decompositions into luts smaller than 6 --- src/map/if/acd/ac_decomposition.hpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp index a384cc2e1..a7321d0d5 100644 --- a/src/map/if/acd/ac_decomposition.hpp +++ b/src/map/if/acd/ac_decomposition.hpp @@ -1369,10 +1369,15 @@ class ac_decomposition_impl std::swap( var_index1, var_index2 ); } - assert( num_vars > 6 ); const uint32_t num_blocks = 1 << ( num_vars - 6 ); - if ( var_index2 <= 5 ) + if ( num_vars <= 6 ) + { + const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + tt._bits[0] = ( tt._bits[0] & pmask[0] ) | ( ( tt._bits[0] & pmask[1] ) << shift ) | ( ( tt._bits[0] & pmask[2] ) >> shift ); + } + else if ( var_index2 <= 5 ) { const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2]; const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); From 6b5ebb3e76802a01e78c2a36e0327a270884e909 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 10 Apr 2024 18:42:52 +0200 Subject: [PATCH 05/11] Removing assertion when decomposing into LUTs smaller than 6 --- src/map/if/acd/ac_decomposition.hpp | 2 +- src/map/if/acd/ac_wrapper.h | 4 +- src/map/if/acd/acdXX.hpp | 1246 +++++++++++++++++++++++++++ 3 files changed, 1249 insertions(+), 3 deletions(-) create mode 100644 src/map/if/acd/acdXX.hpp diff --git a/src/map/if/acd/ac_decomposition.hpp b/src/map/if/acd/ac_decomposition.hpp index a7321d0d5..d8a44b1d5 100644 --- a/src/map/if/acd/ac_decomposition.hpp +++ b/src/map/if/acd/ac_decomposition.hpp @@ -1369,7 +1369,7 @@ class ac_decomposition_impl std::swap( var_index1, var_index2 ); } - const uint32_t num_blocks = 1 << ( num_vars - 6 ); + const uint32_t num_blocks = num_vars <= 6 ? 1 : 1 << ( num_vars - 6 ); if ( num_vars <= 6 ) { diff --git a/src/map/if/acd/ac_wrapper.h b/src/map/if/acd/ac_wrapper.h index a6e54b0a9..6bde657c0 100644 --- a/src/map/if/acd/ac_wrapper.h +++ b/src/map/if/acd/ac_wrapper.h @@ -35,8 +35,8 @@ ABC_NAMESPACE_HEADER_START int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); -int acd66_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ); -int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition ); +int acdXX_evaluate( word * pTruth, unsigned lutSize, unsigned nVars, int compute_decomposition ); +int acdXX_decompose( word * pTruth, unsigned lutSize, unsigned nVars, unsigned char *decomposition ); int acd666_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ); diff --git a/src/map/if/acd/acdXX.hpp b/src/map/if/acd/acdXX.hpp new file mode 100644 index 000000000..5ee6abcd6 --- /dev/null +++ b/src/map/if/acd/acdXX.hpp @@ -0,0 +1,1246 @@ +/**C++File************************************************************** + + FileName [acd66.hpp] + + SystemName [ABC: Logic synthesis and verification system.] + + PackageName [Ashenhurst-Curtis decomposition.] + + Synopsis [Interface with the FPGA mapping package.] + + Author [Alessandro Tempia Calvino] + + Affiliation [EPFL] + + Date [Ver. 1.0. Started - Apr 10, 2024.] + +***********************************************************************/ +/*! + \file acdXX.hpp + \brief Ashenhurst-Curtis decomposition for "XX" cascade + + \author Alessandro Tempia Calvino +*/ + +#ifndef _ACDXX_H_ +#define _ACDXX_H_ +#pragma once + +#include +#include +#include +#include +#include + +#include "kitty_constants.hpp" +#include "kitty_constructors.hpp" +#include "kitty_dynamic_tt.hpp" +#include "kitty_operations.hpp" +#include "kitty_operators.hpp" +#include "kitty_static_tt.hpp" + +ABC_NAMESPACE_CXX_HEADER_START + +namespace acd +{ + +struct acdXX_params +{ + uint32_t lut_size{ 6 }; + uint32_t max_shared_vars{ 4 }; + bool verify{ false }; +}; + +class acdXX_impl +{ +private: + static constexpr uint32_t max_num_vars = 11; + using STT = kitty::static_truth_table; + using LTT = kitty::static_truth_table<6>; + +public: + explicit acdXX_impl( uint32_t const num_vars, acdXX_params const& ps = {} ) + : num_vars( num_vars ), ps( ps ) + { + std::iota( permutations.begin(), permutations.end(), 0 ); + } + + /*! \brief Runs ACD XX */ + bool run( word* ptt ) + { + assert( num_vars > 4 ); + + /* truth table is too large for the settings */ + if ( num_vars > max_num_vars || num_vars >= 2 * ps.lut_size ) + { + return false; + } + + /* saturate the maximum number of shared variables */ + ps.max_shared_vars = std::min( ps.max_shared_vars, ps.lut_size - 2 ); + + /* convert to static TT */ + init_truth_table( ptt ); + + /* run ACD trying different bound sets and free sets */ + return find_decomposition(); + } + + /*! \brief Runs ACD XX */ + int run( word* ptt, unsigned delay_profile ) + { + assert( num_vars > 4 ); + + /* truth table is too large for the settings */ + if ( num_vars > max_num_vars || num_vars >= 2 * ps.lut_size ) + { + return 0; + } + + uint32_t late_arriving = __builtin_popcount( delay_profile ); + + /* too many late arriving variables */ + if ( late_arriving >= ps.lut_size ) + return 0; + + /* saturate the maximum number of shared variables */ + ps.max_shared_vars = std::min( ps.max_shared_vars, ps.lut_size - 2 ); + + /* convert to static TT */ + init_truth_table( ptt ); + best_tt = start_tt; + + /* permute late arriving variables to be the least significant */ + reposition_late_arriving_variables( delay_profile, late_arriving ); + + /* run ACD trying different bound sets and free sets */ + return find_decomposition_offset( late_arriving ) ? ( delay_profile == 0 ? 2 : 1 ) : 0; + } + + int compute_decomposition() + { + if ( best_multiplicity == UINT32_MAX ) + return -1; + + compute_decomposition_impl(); + + if ( ps.verify && !verify_impl() ) + { + return 1; + } + + return 0; + } + + uint32_t get_num_edges() + { + if ( bs_support_size == UINT32_MAX ) + { + return num_vars + 1 + num_shared_vars; + } + + /* real value after support minimization */ + return bs_support_size + best_free_set + 1 + num_shared_vars; + } + + /* contains a 1 for FS variables */ + unsigned get_profile() + { + unsigned profile = 0; + + if ( best_multiplicity == UINT32_MAX ) + return -1; + + for ( uint32_t i = 0; i < best_free_set; ++i ) + { + profile |= 1 << permutations[i]; + } + + return profile; + } + + void get_decomposition( unsigned char* decompArray ) + { + if ( bs_support_size == UINT32_MAX ) + return; + + get_decomposition_abc( decompArray ); + } + +private: + bool find_decomposition() + { + best_multiplicity = UINT32_MAX; + best_free_set = UINT32_MAX; + + /* use multiple shared set variables */ + if ( ps.max_shared_vars > 1 ) + { + return find_decomposition_bs_multi_ss( num_vars - ps.lut_size ); + } + + uint32_t max_free_set = num_vars == ( 2 * ps.lut_size - 1 ) ? ps.lut_size - 1 : ps.lut_size - 1 - ps.max_shared_vars; + + /* find ACD "XX" for different number of variables in the free set */ + for ( uint32_t i = num_vars - ps.lut_size; i <= max_free_set; ++i ) + { + if ( find_decomposition_bs( i ) ) + return true; + } + + best_multiplicity = UINT32_MAX; + return false; + } + + bool find_decomposition_offset( uint32_t offset ) + { + best_multiplicity = UINT32_MAX; + best_free_set = UINT32_MAX; + + /* use multiple shared set variables */ + if ( ps.max_shared_vars > 1 ) + { + return find_decomposition_bs_offset_multi_ss( std::max( num_vars - ps.lut_size, offset ), offset ); + } + + uint32_t max_free_set = ( num_vars == ( 2 * ps.lut_size - 1 ) || ( offset == ps.lut_size - 1 ) ) ? ps.lut_size - 1 : ps.lut_size - 1 - ps.max_shared_vars; + + /* find ACD "66" for different number of variables in the free set */ + for ( uint32_t i = std::max( num_vars - 6, offset ); i <= max_free_set; ++i ) + { + if ( find_decomposition_bs_offset( i, offset ) ) + return true; + } + + best_multiplicity = UINT32_MAX; + return false; + } + + void init_truth_table( word* ptt ) + { + uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) ); + + for ( uint32_t i = 0; i < num_blocks; ++i ) + { + start_tt._bits[i] = ptt[i]; + } + + local_extend_to( start_tt, num_vars ); + } + + uint32_t column_multiplicity( STT const& tt, uint32_t free_set_size ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t const limit = ( ps.max_shared_vars == 0 || free_set_size == ( ps.lut_size - 1 ) ) ? 2 : 4; + uint32_t const inner_loop_max = ( num_vars < 6 ? ( 1 << num_vars ) : 64 ) >> free_set_size; + uint32_t cofactors[4]; + uint32_t size = 0; + + /* extract iset functions */ + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < inner_loop_max; ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t k; + for ( k = 0; k < size; ++k ) + { + if ( fs_fn == cofactors[k] ) + break; + } + if ( k == limit ) + return 5; + if ( k == size ) + cofactors[size++] = fs_fn; + sub >>= shift; + } + } + + return size; + } + + uint32_t column_multiplicity2( STT const& tt, uint32_t free_set_size, uint32_t const limit ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t const inner_loop_max = ( num_vars < 6 ? ( 1 << num_vars ) : 64 ) >> free_set_size; + uint32_t cofactors[32]; + uint32_t size = 0; + + /* extract iset functions */ + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < inner_loop_max; ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t k; + for ( k = 0; k < size; ++k ) + { + if ( fs_fn == cofactors[k] ) + break; + } + if ( k == limit ) + return 64; + if ( k == size ) + cofactors[size++] = fs_fn; + sub >>= shift; + } + } + + return size; + } + + inline bool combinations_next( uint32_t k, uint32_t offset, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) + { + uint32_t i; + + for ( i = k - 1; pComb[i] == num_vars - k + i; --i ) + { + if ( i == offset ) + return false; + } + + /* move vars */ + uint32_t var_old = pComb[i]; + uint32_t pos_new = pInvPerm[var_old + 1]; + std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); + std::swap( pComb[i], pComb[pos_new] ); + swap_inplace_local( tt, i, pos_new ); + + for ( uint32_t j = i + 1; j < k; j++ ) + { + var_old = pComb[j]; + pos_new = pInvPerm[pComb[j - 1] + 1]; + std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); + std::swap( pComb[j], pComb[pos_new] ); + swap_inplace_local( tt, j, pos_new ); + } + + return true; + } + + inline bool combinations_next_simple( uint32_t k, uint32_t* pComb, uint32_t* pInvPerm, uint32_t size ) + { + uint32_t i; + + for ( i = k - 1; pComb[i] == size - k + i; --i ) + { + if ( i == 0 ) + return false; + } + + /* move vars */ + uint32_t var_old = pComb[i]; + uint32_t pos_new = pInvPerm[var_old + 1]; + std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); + std::swap( pComb[i], pComb[pos_new] ); + + for ( uint32_t j = i + 1; j < k; j++ ) + { + var_old = pComb[j]; + pos_new = pInvPerm[pComb[j - 1] + 1]; + std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); + std::swap( pComb[j], pComb[pos_new] ); + } + + return true; + } + + bool find_decomposition_bs( uint32_t free_set_size ) + { + STT tt = start_tt; + + /* works up to 11 input truth tables */ + assert( num_vars <= 11 ); + + /* init combinations */ + uint32_t pComb[11], pInvPerm[11]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + best_free_set = free_set_size; + do + { + uint32_t cost = column_multiplicity( tt, free_set_size ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pComb[i]; + } + return true; + } + else if ( ps.max_shared_vars > 0 && cost <= 4 && free_set_size < 5 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set( tt ); + + if ( res >= 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pComb[i]; + } + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, res, num_vars - 1 ); + std::swap( permutations[res], permutations[num_vars - 1] ); + num_shared_vars = 1; + return true; + } + } + } while ( combinations_next( free_set_size, 0, pComb, pInvPerm, tt ) ); + + return false; + } + + bool find_decomposition_bs_offset( uint32_t free_set_size, uint32_t offset ) + { + STT tt = best_tt; + + /* works up to 11 input truth tables */ + assert( num_vars <= 11 ); + best_free_set = free_set_size; + + /* special case */ + if ( free_set_size == offset ) + { + uint32_t cost = column_multiplicity( tt, free_set_size ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + return true; + } + else if ( ps.max_shared_vars > 0 && cost <= 4 && free_set_size < 5 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set( tt ); + + if ( res >= 0 ) + { + best_tt = tt; + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, res, num_vars - 1 ); + std::swap( permutations[res], permutations[num_vars - 1] ); + num_shared_vars = 1; + return true; + } + } + return false; + } + + /* init combinations */ + uint32_t pComb[11], pInvPerm[11]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + do + { + uint32_t cost = column_multiplicity( tt, free_set_size ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pInvPerm[i] = permutations[pComb[i]]; + } + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pInvPerm[i]; + } + return true; + } + else if ( ps.max_shared_vars > 0 && cost <= 4 && free_set_size < 5 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set( tt ); + + if ( res >= 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pInvPerm[i] = permutations[pComb[i]]; + } + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pInvPerm[i]; + } + /* move shared variable as the most significative one */ + swap_inplace_local( best_tt, res, num_vars - 1 ); + std::swap( permutations[res], permutations[num_vars - 1] ); + num_shared_vars = 1; + return true; + } + } + } while ( combinations_next( free_set_size, offset, pComb, pInvPerm, tt ) ); + + return false; + } + + bool find_decomposition_bs_multi_ss( uint32_t free_set_size ) + { + STT tt = start_tt; + + /* works up to 11 input truth tables */ + assert( num_vars <= 11 ); + + /* init combinations */ + uint32_t pComb[11], pInvPerm[11], shared_set[4]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + uint32_t limit = std::min( 1 << ( ps.lut_size - free_set_size ), 1 << ( ps.max_shared_vars + 1 ) ); + + /* enumerate combinations */ + best_free_set = free_set_size; + do + { + uint32_t cost = column_multiplicity2( tt, free_set_size, limit ); + if ( cost <= 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pComb[i]; + } + return true; + } + + uint32_t ss_vars_needed = cost <= 4 ? 1 : cost <= 8 ? 2 + : cost <= 16 ? 3 + : cost <= 32 ? 4 + : 5; + if ( ss_vars_needed + free_set_size < ps.lut_size ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set_multi( tt, ss_vars_needed, shared_set ); + + if ( res >= 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pComb[i]; + } + /* move shared variables as the most significative ones */ + for ( int32_t i = res - 1; i >= 0; --i ) + { + swap_inplace_local( best_tt, shared_set[i] + best_free_set, num_vars - res + i ); + std::swap( permutations[shared_set[i] + best_free_set], permutations[num_vars - res + i] ); + } + num_shared_vars = res; + return true; + } + } + } while ( combinations_next( free_set_size, 0, pComb, pInvPerm, tt ) ); + + return false; + } + + bool find_decomposition_bs_offset_multi_ss( uint32_t free_set_size, uint32_t offset ) + { + STT tt = best_tt; + + /* works up to 11 input truth tables */ + assert( num_vars <= 11 ); + best_free_set = free_set_size; + uint32_t shared_set[4]; + + uint32_t limit = std::min( 1 << ( ps.lut_size - free_set_size ), 1 << ( ps.max_shared_vars + 1 ) ); + + /* special case */ + if ( free_set_size == offset ) + { + uint32_t cost = column_multiplicity2( tt, free_set_size, limit ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + return true; + } + + uint32_t ss_vars_needed = cost <= 4 ? 1 : cost <= 8 ? 2 + : cost <= 16 ? 3 + : cost <= 32 ? 4 + : 5; + + if ( ss_vars_needed + free_set_size < 6 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set_multi( tt, ss_vars_needed, shared_set ); + + if ( res >= 0 ) + { + best_tt = tt; + /* move shared variables as the most significative ones */ + for ( int32_t i = res - 1; i >= 0; --i ) + { + swap_inplace_local( best_tt, shared_set[i] + best_free_set, num_vars - res + i ); + std::swap( permutations[shared_set[i] + best_free_set], permutations[num_vars - res + i] ); + } + num_shared_vars = res; + return true; + } + } + return false; + } + + /* init combinations */ + uint32_t pComb[11], pInvPerm[11]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + /* enumerate combinations */ + do + { + uint32_t cost = column_multiplicity2( tt, free_set_size, limit ); + if ( cost == 2 ) + { + best_tt = tt; + best_multiplicity = cost; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pInvPerm[i] = permutations[pComb[i]]; + } + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pInvPerm[i]; + } + return true; + } + + uint32_t ss_vars_needed = cost <= 4 ? 1 : cost <= 8 ? 2 + : cost <= 16 ? 3 + : cost <= 32 ? 4 + : 5; + + if ( ss_vars_needed + free_set_size < 6 ) + { + /* look for a shared variable */ + best_multiplicity = cost; + int res = check_shared_set_multi( tt, ss_vars_needed, shared_set ); + + if ( res >= 0 ) + { + best_tt = tt; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + pInvPerm[i] = permutations[pComb[i]]; + } + for ( uint32_t i = 0; i < num_vars; ++i ) + { + permutations[i] = pInvPerm[i]; + } + /* move shared variables as the most significative ones */ + for ( int32_t i = res - 1; i >= 0; --i ) + { + swap_inplace_local( best_tt, shared_set[i] + best_free_set, num_vars - res + i ); + std::swap( permutations[shared_set[i] + best_free_set], permutations[num_vars - res + i] ); + } + num_shared_vars = res; + return true; + } + } + } while ( combinations_next( free_set_size, offset, pComb, pInvPerm, tt ) ); + + return false; + } + + bool check_shared_var( STT const& tt, uint32_t free_set_size, uint32_t shared_var ) + { + assert( free_set_size <= 5 ); + + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t const inner_loop_max = ( num_vars < 6 ? ( 1 << num_vars ) : 64 ) >> free_set_size; + uint32_t cofactors[2][4]; + uint32_t size[2] = { 0, 0 }; + uint32_t shared_var_shift = shared_var - free_set_size; + + /* extract iset functions */ + uint32_t iteration_counter = 0; + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < inner_loop_max; ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t p = ( iteration_counter >> shared_var_shift ) & 1; + uint32_t k; + for ( k = 0; k < size[p]; ++k ) + { + if ( fs_fn == cofactors[p][k] ) + break; + } + if ( k == 2 ) + return false; + if ( k == size[p] ) + cofactors[p][size[p]++] = fs_fn; + sub >>= shift; + ++iteration_counter; + } + } + + return true; + } + + inline int check_shared_set( STT const& tt ) + { + /* find one shared set variable */ + for ( uint32_t i = best_free_set; i < num_vars; ++i ) + { + /* check the multiplicity of cofactors */ + if ( check_shared_var( tt, best_free_set, i ) ) + { + return i; + } + } + + return -1; + } + + bool check_shared_var_combined( STT const& tt, uint32_t free_set_size, uint32_t shared_vars[6], uint32_t num_shared_vars ) + { + assert( free_set_size <= 5 ); + assert( num_shared_vars <= 4 ); + + uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << free_set_size; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t const inner_loop_max = ( num_vars < 6 ? ( 1 << num_vars ) : 64 ) >> free_set_size; + uint32_t cofactors[16][2]; + uint32_t size[16] = { 0 }; + + /* extract iset functions */ + uint32_t iteration_counter = 0; + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t sub = tt._bits[i]; + for ( auto j = 0; j < inner_loop_max; ++j ) + { + uint32_t fs_fn = static_cast( sub & mask ); + uint32_t p = 0; + for ( uint32_t k = 0; k < num_shared_vars; ++k ) + { + p |= ( ( iteration_counter >> shared_vars[k] ) & 1 ) << k; + } + + uint32_t k; + for ( k = 0; k < size[p]; ++k ) + { + if ( fs_fn == cofactors[p][k] ) + break; + } + if ( k == 2 ) + return false; + if ( k == size[p] ) + cofactors[p][size[p]++] = fs_fn; + sub >>= shift; + ++iteration_counter; + } + } + + return true; + } + + inline int check_shared_set_multi( STT const& tt, uint32_t target_num_ss, uint32_t* res_shared ) + { + /* init combinations */ + uint32_t pComb[6], pInvPerm[6]; + uint32_t max_shared_vars = std::min( ps.lut_size - best_free_set - 1, ps.max_shared_vars ); + + /* search for a feasible shared set */ + for ( uint32_t i = target_num_ss; i <= max_shared_vars; ++i ) + { + for ( uint32_t i = 0; i < 6; ++i ) + { + pComb[i] = pInvPerm[i] = i; + } + + do + { + /* check for combined shared set */ + if ( check_shared_var_combined( tt, best_free_set, pComb, i ) ) + { + for ( uint32_t j = 0; j < i; ++j ) + { + res_shared[j] = pComb[j]; + } + /* sort vars */ + std::sort( res_shared, res_shared + i ); + return i; + } + } while ( combinations_next_simple( i, pComb, pInvPerm, num_vars - best_free_set ) ); + } + + return -1; + } + + void compute_decomposition_impl( bool verbose = false ) + { + /* construct isets involved in multiplicity */ + LTT composition; + LTT bs; + LTT bs_dc; + + /* construct isets */ + uint32_t offset = 0; + uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; + uint64_t const shift = UINT64_C( 1 ) << best_free_set; + uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; + uint32_t const num_groups = 1 << num_shared_vars; + uint32_t const next_group = 1 << ( num_vars - best_free_set - num_shared_vars ); + uint32_t const inner_loop_max = ( num_vars < 6 ? ( 1 << num_vars ) : 64 ) >> best_free_set; + + uint64_t fs_fun[32] = { 0 }; + uint64_t dc_mask = ( ( UINT64_C( 1 ) << next_group ) - 1 ); + + uint32_t group_index = 0; + uint32_t set_index = 0; + fs_fun[0] = best_tt._bits[0] & mask; + bool set_dc = true; + for ( auto i = 0u; i < num_blocks; ++i ) + { + uint64_t cof = best_tt._bits[i]; + for ( auto j = 0; j < inner_loop_max; ++j ) + { + uint64_t val = cof & mask; + /* move to next block */ + if ( set_index == next_group ) + { + if ( set_dc ) + { + /* only one cofactor can be found in the group --> encoding can be 0 or 1 */ + fs_fun[group_index + 1] = fs_fun[group_index]; + bs_dc._bits |= dc_mask; + } + /* set don't care */ + set_dc = true; + group_index += 2; + set_index = 0; + fs_fun[group_index] = val; + dc_mask <<= next_group; + } + /* gather encoding */ + if ( val != fs_fun[group_index] ) + { + bs._bits |= UINT64_C( 1 ) << ( j + offset ); + fs_fun[group_index + 1] = val; + set_dc = false; // two cofactors are present + } + cof >>= shift; + ++set_index; + } + offset = ( offset + inner_loop_max ) & 0x3F; + } + + if ( set_dc ) + { + /* only one cofactor can be found in the group --> encoding can be 0 or 1 */ + fs_fun[group_index + 1] = fs_fun[group_index]; + bs_dc._bits |= dc_mask; + } + + /* create composition function */ + for ( uint32_t i = 0; i < 2 * num_groups; ++i ) + { + composition._bits |= fs_fun[i] << ( i * shift ); + } + + /* minimize support BS */ + LTT care; + bs_support_size = 0; + uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; + care._bits = masks[num_vars - best_free_set] & ~bs_dc._bits; + for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) + { + if ( !has_var6( bs, care, i ) ) + { + adjust_truth_table_on_dc( bs, care, i ); + continue; + } + + if ( bs_support_size < i ) + { + kitty::swap_inplace( bs, bs_support_size, i ); + } + + bs_support[bs_support_size] = i; + ++bs_support_size; + } + + /* assign functions */ + dec_funcs[0] = bs._bits; + dec_funcs[1] = composition._bits; + + /* print functions */ + if ( verbose ) + { + LTT f; + f._bits = dec_funcs[0]; + std::cout << "BS function : "; + kitty::print_hex( f ); + std::cout << "\n"; + f._bits = dec_funcs[1]; + std::cout << "Composition function: "; + kitty::print_hex( f ); + std::cout << "\n"; + } + } + + template + void local_extend_to( TT_type& tt, uint32_t real_num_vars ) + { + if ( real_num_vars < 6 ) + { + auto mask = *tt.begin(); + + for ( auto i = real_num_vars; i < num_vars; ++i ) + { + mask |= ( mask << ( 1 << i ) ); + } + + std::fill( tt.begin(), tt.end(), mask ); + } + else + { + uint32_t num_blocks = ( 1u << ( real_num_vars - 6 ) ); + auto it = tt.begin(); + while ( it != tt.end() ) + { + it = std::copy( tt.cbegin(), tt.cbegin() + num_blocks, it ); + } + } + } + + inline void reposition_late_arriving_variables( unsigned delay_profile, uint32_t late_arriving ) + { + uint32_t k = 0; + for ( uint32_t i = 0; i < late_arriving; ++i ) + { + while ( ( ( delay_profile >> k ) & 1 ) == 0 ) + ++k; + + if ( permutations[i] == k ) + { + ++k; + continue; + } + + std::swap( permutations[i], permutations[k] ); + swap_inplace_local( best_tt, i, k ); + ++k; + } + } + + void swap_inplace_local( STT& tt, uint8_t var_index1, uint8_t var_index2 ) + { + if ( var_index1 == var_index2 ) + { + return; + } + + if ( var_index1 > var_index2 ) + { + std::swap( var_index1, var_index2 ); + } + + const uint32_t num_blocks = num_vars <= 6 ? 1 : 1 << ( num_vars - 6 ); + + if ( num_vars <= 6 ) + { + const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + tt._bits[0] = ( tt._bits[0] & pmask[0] ) | ( ( tt._bits[0] & pmask[1] ) << shift ) | ( ( tt._bits[0] & pmask[2] ) >> shift ); + } + else if ( var_index2 <= 5 ) + { + const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2]; + const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); + std::transform( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, std::begin( tt._bits ), + [shift, &pmask]( uint64_t word ) { + return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift ); + } ); + } + else if ( var_index1 <= 5 ) /* in this case, var_index2 > 5 */ + { + const auto step = 1 << ( var_index2 - 6 ); + const auto shift = 1 << var_index1; + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = decltype( step ){ 0 }; i < step; ++i ) + { + const auto low_to_high = ( *( it + i ) & kitty::detail::projections[var_index1] ) >> shift; + const auto high_to_low = ( *( it + i + step ) << shift ) & kitty::detail::projections[var_index1]; + *( it + i ) = ( *( it + i ) & ~kitty::detail::projections[var_index1] ) | high_to_low; + *( it + i + step ) = ( *( it + i + step ) & kitty::detail::projections[var_index1] ) | low_to_high; + } + it += 2 * step; + } + } + else + { + const auto step1 = 1 << ( var_index1 - 6 ); + const auto step2 = 1 << ( var_index2 - 6 ); + auto it = std::begin( tt._bits ); + while ( it != std::begin( tt._bits ) + num_blocks ) + { + for ( auto i = 0; i < step2; i += 2 * step1 ) + { + for ( auto j = 0; j < step1; ++j ) + { + std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) ); + } + } + it += 2 * step2; + } + } + } + + inline bool has_var6( const LTT& tt, const LTT& care, uint8_t var_index ) + { + if ( ( ( ( tt._bits >> ( uint64_t( 1 ) << var_index ) ) ^ tt._bits ) & kitty::detail::projections_neg[var_index] & ( care._bits >> ( uint64_t( 1 ) << var_index ) ) & care._bits ) != 0 ) + { + return true; + } + + return false; + } + + void adjust_truth_table_on_dc( LTT& tt, LTT& care, uint32_t var_index ) + { + uint64_t new_bits = tt._bits & care._bits; + tt._bits = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | + ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); + care._bits = ( care._bits | ( care._bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index]; + care._bits = care._bits | ( care._bits << ( uint64_t( 1 ) << var_index ) ); + } + + /* Decomposition format for ABC + * + * The record is an array of unsigned chars where: + * - the first unsigned char entry stores the number of unsigned chars in the record + * - the second entry stores the number of LUTs + * After this, several sub-records follow, each representing one LUT as follows: + * - an unsigned char entry listing the number of fanins + * - a list of fanins, from the LSB to the MSB of the truth table. The N inputs of the original function + * have indexes from 0 to N-1, followed by the internal signals in a topological order + * - the LUT truth table occupying 2^(M-3) bytes, where M is the fanin count of the LUT, from the LSB to the MSB. + * A 2-input LUT, which takes 4 bits, should be stretched to occupy 8 bits (one unsigned char) + * A 0- or 1-input LUT can be represented similarly but it is not expected that such LUTs will be represented + */ + void get_decomposition_abc( unsigned char* decompArray ) + { + unsigned char* pArray = decompArray; + unsigned char bytes = 2; + + /* write number of LUTs */ + pArray++; + *pArray = 2; + pArray++; + + /* write BS LUT */ + /* write fanin size */ + *pArray = bs_support_size; + pArray++; + ++bytes; + + /* write support */ + for ( uint32_t i = 0; i < bs_support_size; ++i ) + { + *pArray = (unsigned char)permutations[bs_support[i] + best_free_set]; + pArray++; + ++bytes; + } + + /* write truth table */ + uint32_t tt_num_bytes = ( bs_support_size <= 3 ) ? 1 : ( 1 << ( bs_support_size - 3 ) ); + for ( uint32_t i = 0; i < tt_num_bytes; ++i ) + { + *pArray = (unsigned char)( ( dec_funcs[0] >> ( 8 * i ) ) & 0xFF ); + pArray++; + ++bytes; + } + + /* write top LUT */ + /* write fanin size */ + uint32_t support_size = best_free_set + 1 + num_shared_vars; + *pArray = support_size; + pArray++; + ++bytes; + + /* write support */ + for ( uint32_t i = best_free_set; i < best_free_set; ++i ) + { + *pArray = (unsigned char)permutations[i]; + pArray++; + ++bytes; + } + + *pArray = (unsigned char)num_vars; + pArray++; + ++bytes; + + for ( uint32_t i = 0; i < num_shared_vars; ++i ) + { + *pArray = (unsigned char)permutations[num_vars - num_shared_vars + i]; + pArray++; + ++bytes; + } + + /* write truth table */ + tt_num_bytes = ( support_size <= 3 ) ? 1 : ( 1 << ( support_size - 3 ) ); + for ( uint32_t i = 0; i < tt_num_bytes; ++i ) + { + *pArray = (unsigned char)( ( dec_funcs[1] >> ( 8 * i ) ) & 0xFF ); + pArray++; + ++bytes; + } + + /* write numBytes */ + *decompArray = bytes; + } + + bool verify_impl() + { + /* create PIs */ + STT pis[max_num_vars]; + for ( uint32_t i = 0; i < num_vars; ++i ) + { + kitty::create_nth_var( pis[i], permutations[i] ); + } + + /* BS function patterns */ + STT bsi[6]; + for ( uint32_t i = 0; i < bs_support_size; ++i ) + { + bsi[i] = pis[best_free_set + bs_support[i]]; + } + + /* compute first function */ + STT bsf_sim; + for ( uint32_t i = 0u; i < ( 1 << num_vars ); ++i ) + { + uint32_t pattern = 0u; + for ( auto j = 0; j < bs_support_size; ++j ) + { + pattern |= get_bit( bsi[j], i ) << j; + } + if ( ( dec_funcs[0] >> pattern ) & 1 ) + { + set_bit( bsf_sim, i ); + } + } + + /* compute first function */ + STT top_sim; + for ( uint32_t i = 0u; i < ( 1 << num_vars ); ++i ) + { + uint32_t pattern = 0u; + for ( auto j = 0; j < best_free_set; ++j ) + { + pattern |= get_bit( pis[j], i ) << j; + } + pattern |= get_bit( bsf_sim, i ) << best_free_set; + + /* shared variables */ + for ( auto j = 0u; j < num_shared_vars; ++j ) + { + pattern |= get_bit( pis[num_vars - num_shared_vars + j], i ) << ( best_free_set + 1 + j ); + } + + if ( ( dec_funcs[1] >> pattern ) & 1 ) + { + set_bit( top_sim, i ); + } + } + + /* extend function */ + for ( uint32_t i = 0; i < ( 1 << ( num_vars > 6 ? num_vars - 6 : 1 ) ); ++i ) + { + if ( top_sim._bits[i] != start_tt._bits[i] ) + { + std::cout << "Found incorrect decomposition\n"; + report_tt( top_sim ); + std::cout << " instead_of\n"; + report_tt( start_tt ); + return false; + } + } + + return true; + } + + uint32_t get_bit( const STT& tt, uint64_t index ) + { + return ( tt._bits[index >> 6] >> ( index & 0x3f ) ) & 0x1; + } + + void set_bit( STT& tt, uint64_t index ) + { + tt._bits[index >> 6] |= uint64_t( 1 ) << ( index & 0x3f ); + } + + void report_tt( STT const& stt ) + { + kitty::dynamic_truth_table tt( num_vars ); + + std::copy( std::begin( stt._bits ), std::begin( stt._bits ) + ( 1 << ( num_vars > 6 ? num_vars - 6 : 0 ) ), std::begin( tt ) ); + kitty::print_hex( tt ); + std::cout << "\n"; + } + +private: + uint32_t best_multiplicity{ UINT32_MAX }; + uint32_t best_free_set{ UINT32_MAX }; + uint32_t best_multiplicity0{ UINT32_MAX }; + uint32_t best_multiplicity1{ UINT32_MAX }; + uint32_t bs_support_size{ UINT32_MAX }; + uint32_t num_shared_vars{ 0 }; + STT best_tt; + STT start_tt; + uint64_t dec_funcs[2]; + uint32_t bs_support[6]; + + uint32_t const num_vars; + acdXX_params ps; + std::array permutations; +}; + +} // namespace acd + +ABC_NAMESPACE_CXX_HEADER_END + +#endif // _ACDXX_H_ \ No newline at end of file From 64fea5c4c246ba7bc05f58c4ab26f9fab808f7ed Mon Sep 17 00:00:00 2001 From: aletempiac Date: Wed, 10 Apr 2024 18:43:52 +0200 Subject: [PATCH 06/11] Improving the performance and quality of acd66 --- src/map/if/acd/acd66.hpp | 260 ++++----------------------------------- 1 file changed, 25 insertions(+), 235 deletions(-) diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index aea9752e6..e9dec8a3e 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -732,7 +732,7 @@ class acd66_impl uint32_t p = 0; for ( uint32_t k = 0; k < num_shared_vars; ++k ) { - p += ( ( iteration_counter >> shared_vars[k] ) & 1 ) << k; + p |= ( ( iteration_counter >> shared_vars[k] ) & 1 ) << k; } uint32_t k; @@ -787,97 +787,10 @@ class acd66_impl void compute_decomposition_impl( bool verbose = false ) { - if ( num_shared_vars > 1 ) - return compute_decomposition_impl_multi_ss( verbose ); - - bool has_shared_set = num_shared_vars > 0; - - /* construct isets involved in multiplicity */ - LTT isets0[2]; - LTT isets1[2]; - - /* construct isets */ - uint32_t offset = 0; - uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1; - uint64_t const shift = UINT64_C( 1 ) << best_free_set; - uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; - - /* limit analysis on 0 cofactor of the shared variable */ - if ( has_shared_set ) - num_blocks >>= 1; - - uint64_t fs_fun[4] = { best_tt._bits[0] & mask, 0, 0, 0 }; - - for ( auto i = 0u; i < num_blocks; ++i ) - { - uint64_t cof = best_tt._bits[i]; - for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) - { - uint64_t val = cof & mask; - if ( val == fs_fun[0] ) - { - isets0[0]._bits |= UINT64_C( 1 ) << ( j + offset ); - } - else - { - isets0[1]._bits |= UINT64_C( 1 ) << ( j + offset ); - fs_fun[1] = val; - } - cof >>= shift; - } - offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; - } - - /* continue on the 1 cofactor if shared set */ - if ( has_shared_set ) - { - fs_fun[2] = best_tt._bits[num_blocks] & mask; - for ( auto i = num_blocks; i < ( num_blocks << 1 ); ++i ) - { - uint64_t cof = best_tt._bits[i]; - for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) - { - uint64_t val = cof & mask; - if ( val == fs_fun[2] ) - { - isets1[0]._bits |= UINT64_C( 1 ) << ( j + offset ); - } - else - { - isets1[1]._bits |= UINT64_C( 1 ) << ( j + offset ); - fs_fun[3] = val; - } - cof >>= shift; - } - offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; - } - } - - /* find the support minimizing combination with shared set */ - compute_functions( isets0, isets1, fs_fun ); - - /* print functions */ - if ( verbose ) - { - LTT f; - f._bits = dec_funcs[0]; - std::cout << "BS function : "; - kitty::print_hex( f ); - std::cout << "\n"; - f._bits = dec_funcs[1]; - std::cout << "Composition function: "; - kitty::print_hex( f ); - std::cout << "\n"; - } - } - - void compute_decomposition_impl_multi_ss( bool verbose = false ) - { - /* due to the high multiplicity value this method does not perform support minimization */ - /* construct isets involved in multiplicity */ LTT composition; LTT bs; + LTT bs_dc; /* construct isets */ uint32_t offset = 0; @@ -888,10 +801,12 @@ class acd66_impl uint32_t const next_group = 1 << ( num_vars - best_free_set - num_shared_vars ); uint64_t fs_fun[32] = { 0 }; + uint64_t dc_mask = ( ( UINT64_C( 1 ) << next_group ) - 1 ); uint32_t group_index = 0; uint32_t set_index = 0; fs_fun[0] = best_tt._bits[0] & mask; + bool set_dc = true; for ( auto i = 0u; i < num_blocks; ++i ) { uint64_t cof = best_tt._bits[i]; @@ -901,15 +816,25 @@ class acd66_impl /* move to next block */ if ( set_index == next_group ) { + if ( set_dc ) + { + /* only one cofactor can be found in the group --> encoding can be 0 or 1 */ + fs_fun[group_index + 1] = fs_fun[group_index]; + bs_dc._bits |= dc_mask; + } + /* set don't care */ + set_dc = true; group_index += 2; set_index = 0; fs_fun[group_index] = val; + dc_mask <<= next_group; } /* gather encoding */ if ( val != fs_fun[group_index] ) { bs._bits |= UINT64_C( 1 ) << ( j + offset ); fs_fun[group_index + 1] = val; + set_dc = false; // two cofactors are present } cof >>= shift; ++set_index; @@ -917,6 +842,13 @@ class acd66_impl offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; } + if ( set_dc ) + { + /* only one cofactor can be found in the group --> encoding can be 0 or 1 */ + fs_fun[group_index + 1] = fs_fun[group_index]; + bs_dc._bits |= dc_mask; + } + /* create composition function */ for ( uint32_t i = 0; i < 2 * num_groups; ++i ) { @@ -927,11 +859,12 @@ class acd66_impl LTT care; bs_support_size = 0; uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; - care._bits = masks[num_vars - best_free_set]; + care._bits = masks[num_vars - best_free_set] & ~bs_dc._bits; for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) { if ( !has_var6( bs, care, i ) ) { + adjust_truth_table_on_dc( bs, care, i ); continue; } @@ -963,150 +896,6 @@ class acd66_impl } } - inline void compute_functions( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4] ) - { - /* u = 2 no support minimization */ - if ( best_multiplicity < 3 ) - { - dec_funcs[0] = isets0[0]._bits; - bs_support_size = num_vars - best_free_set; - for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) - { - bs_support[i] = i; - } - compute_composition( fs_fun ); - return; - } - - /* u = 4 two possibilities */ - if ( best_multiplicity == 4 ) - { - compute_functions4( isets0, isets1, fs_fun ); - return; - } - - /* u = 3 if both sets have multiplicity 2 there are no don't cares */ - if ( best_multiplicity0 == best_multiplicity1 ) - { - compute_functions4( isets0, isets1, fs_fun ); - return; - } - - /* u = 3 one set has multiplicity 1, use don't cares */ - compute_functions3( isets0, isets1, fs_fun ); - } - - inline void compute_functions4( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4] ) - { - uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; - LTT f = isets0[0] | isets1[1]; - LTT care; - care._bits = masks[num_vars - best_free_set]; - - /* count the number of support variables */ - uint32_t support_vars1 = 0; - for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) - { - support_vars1 += has_var6( f, care, i ) ? 1 : 0; - bs_support[i] = i; - } - - /* use a different set */ - f = isets0[0] | isets1[0]; - - uint32_t support_vars2 = 0; - for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) - { - support_vars2 += has_var6( f, care, i ) ? 1 : 0; - } - - bs_support_size = support_vars2; - if ( support_vars2 > support_vars1 ) - { - f = isets0[0] | isets1[1]; - std::swap( fs_fun[2], fs_fun[3] ); - bs_support_size = support_vars1; - } - - /* move variables */ - if ( bs_support_size < num_vars - best_free_set ) - { - support_vars1 = 0; - for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) - { - if ( !has_var6( f, care, i ) ) - { - continue; - } - - if ( support_vars1 < i ) - { - kitty::swap_inplace( f, support_vars1, i ); - } - - bs_support[support_vars1] = i; - ++support_vars1; - } - } - - dec_funcs[0] = f._bits; - compute_composition( fs_fun ); - } - - inline void compute_functions3( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4] ) - { - uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; - LTT f = isets0[0] | isets1[0]; - LTT care; - - /* init the care set */ - if ( best_multiplicity0 == 1 ) - { - care._bits = masks[num_vars - best_free_set] & ( ~isets0[0]._bits ); - fs_fun[1] = fs_fun[0]; - } - else - { - care._bits = masks[num_vars - best_free_set] & ( ~isets1[0]._bits ); - fs_fun[3] = fs_fun[2]; - } - - /* count the number of support variables */ - uint32_t support_vars = 0; - for ( uint32_t i = 0; i < num_vars - best_free_set; ++i ) - { - if ( !has_var6( f, care, i ) ) - { - adjust_truth_table_on_dc( f, care, i ); - continue; - } - - if ( support_vars < i ) - { - kitty::swap_inplace( f, support_vars, i ); - } - - bs_support[support_vars] = i; - ++support_vars; - } - - bs_support_size = support_vars; - dec_funcs[0] = f._bits; - compute_composition( fs_fun ); - } - - void compute_composition( uint64_t fs_fun[4] ) - { - dec_funcs[1] = fs_fun[0] << ( 1 << best_free_set ); - dec_funcs[1] |= fs_fun[1]; - - if ( best_multiplicity > 2 ) - { - dec_funcs[1] |= fs_fun[2] << ( ( 2 << best_free_set ) + ( 1 << best_free_set ) ); - dec_funcs[1] |= fs_fun[3] << ( 2 << best_free_set ); - } - } - template void local_extend_to( TT_type& tt, uint32_t real_num_vars ) { @@ -1227,7 +1016,8 @@ class acd66_impl uint64_t new_bits = tt._bits & care._bits; tt._bits = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); - care._bits = care._bits | ( care._bits >> ( uint64_t( 1 ) << var_index ) ); + care._bits = ( care._bits | ( care._bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index]; + care._bits = care._bits | ( care._bits << ( uint64_t( 1 ) << var_index ) ); } /* Decomposition format for ABC From 32bc1d4ab245a03820de8fdee82c5621fa08e14c Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 11 Apr 2024 11:31:28 +0200 Subject: [PATCH 07/11] Cleaning and generalizing code --- src/base/abci/abc.c | 4 +-- src/map/if/acd/ac_wrapper.cpp | 40 ++++++++--------------- src/map/if/acd/ac_wrapper.h | 4 +-- src/map/if/acd/acd66.hpp | 6 ++-- src/map/if/acd/acdXX.hpp | 17 ++++------ src/map/if/if.h | 2 +- src/map/if/ifDec66.c | 61 +++++++++-------------------------- 7 files changed, 42 insertions(+), 92 deletions(-) diff --git a/src/base/abci/abc.c b/src/base/abci/abc.c index b9535f423..36c8f698b 100644 --- a/src/base/abci/abc.c +++ b/src/base/abci/abc.c @@ -19700,7 +19700,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) globalUtilOptind++; if ( strlen(pPars->pLutStruct) != 2 && strlen(pPars->pLutStruct) != 3 ) { - Abc_Print( -1, "Command line switch \"-J\" should be followed by a 2- or 3-char string (e.g. \"66\" or \"666\").\n" ); + Abc_Print( -1, "Command line switch \"-J\" should be followed by a 2-char string (e.g. \"44\" or \"66\" \").\n" ); goto usage; } break; @@ -19883,7 +19883,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) } if ( pPars->fEnableStructN ) { - pPars->pFuncCell = pPars->fDelayOptLut ? NULL : If_CutPerformCheck66; + pPars->pFuncCell = pPars->fDelayOptLut ? NULL : If_CutPerformCheckXX; } else { diff --git a/src/map/if/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp index 6ee265318..7914bbd42 100644 --- a/src/map/if/acd/ac_wrapper.cpp +++ b/src/map/if/acd/ac_wrapper.cpp @@ -19,7 +19,7 @@ #include "ac_wrapper.h" #include "ac_decomposition.hpp" #include "acd66.hpp" -#include "acd666.hpp" +#include "acdXX.hpp" ABC_NAMESPACE_IMPL_START @@ -113,61 +113,49 @@ int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, } } -int acd66_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ) +inline int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition ) { using namespace acd; - acd66_impl acd( nVars, true, false ); if ( acd.run( pTruth ) == 0 ) return 0; - - if ( !compute_decomposition ) + if ( decomposition == NULL ) return 1; - int val = acd.compute_decomposition(); if ( val != 0 ) { return 0; } + acd.get_decomposition( decomposition ); return 1; } -int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition ) +int acdXX_decompose( word * pTruth, unsigned lutSize, unsigned nVars, unsigned char *decomposition ) { using namespace acd; - acd66_impl acd( nVars, true, false ); - acd.run( pTruth ); - - int val = acd.compute_decomposition(); - if ( val != 0 ) + if ( lutSize == 6 ) { - return -1; + return acd66_decompose( pTruth, nVars, decomposition ); } - - acd.get_decomposition( decomposition ); - return 0; -} - -int acd666_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ) -{ - using namespace acd; - - acd666_impl acd( nVars, false ); + + acdXX_params ps; + ps.lut_size = lutSize; + ps.max_shared_vars = lutSize - 2; + acdXX_impl acd( nVars, ps ); if ( acd.run( pTruth ) == 0 ) return 0; - - if ( !compute_decomposition ) + if ( decomposition == NULL ) return 1; - int val = acd.compute_decomposition(); if ( val != 0 ) { return 0; } + acd.get_decomposition( decomposition ); return 1; } diff --git a/src/map/if/acd/ac_wrapper.h b/src/map/if/acd/ac_wrapper.h index 6bde657c0..a4c4bff9b 100644 --- a/src/map/if/acd/ac_wrapper.h +++ b/src/map/if/acd/ac_wrapper.h @@ -35,11 +35,9 @@ ABC_NAMESPACE_HEADER_START int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); -int acdXX_evaluate( word * pTruth, unsigned lutSize, unsigned nVars, int compute_decomposition ); +int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition ); int acdXX_decompose( word * pTruth, unsigned lutSize, unsigned nVars, unsigned char *decomposition ); -int acd666_evaluate( word * pTruth, unsigned nVars, int compute_decomposition ); - ABC_NAMESPACE_HEADER_END #endif \ No newline at end of file diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index e9dec8a3e..500dab361 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -215,7 +215,7 @@ class acd66_impl local_extend_to( start_tt, num_vars ); } - uint32_t column_multiplicity( STT const& tt, uint32_t free_set_size ) + uint32_t column_multiplicity( STT const& tt, uint32_t const free_set_size ) { assert( free_set_size <= 5 ); @@ -250,7 +250,7 @@ class acd66_impl return size; } - uint32_t column_multiplicity2( STT const& tt, uint32_t free_set_size, uint32_t const limit ) + uint32_t column_multiplicity2( STT const& tt, uint32_t const free_set_size, uint32_t const limit ) { assert( free_set_size <= 5 ); @@ -1198,8 +1198,6 @@ class acd66_impl private: uint32_t best_multiplicity{ UINT32_MAX }; uint32_t best_free_set{ UINT32_MAX }; - uint32_t best_multiplicity0{ UINT32_MAX }; - uint32_t best_multiplicity1{ UINT32_MAX }; uint32_t bs_support_size{ UINT32_MAX }; uint32_t num_shared_vars{ 0 }; STT best_tt; diff --git a/src/map/if/acd/acdXX.hpp b/src/map/if/acd/acdXX.hpp index 5ee6abcd6..74d5df8d9 100644 --- a/src/map/if/acd/acdXX.hpp +++ b/src/map/if/acd/acdXX.hpp @@ -46,8 +46,13 @@ namespace acd struct acdXX_params { + /* Nymber of inputs of the LUT */ uint32_t lut_size{ 6 }; + + /* Maximum number of variables in the shared set */ uint32_t max_shared_vars{ 4 }; + + /* Run verification */ bool verify{ false }; }; @@ -228,7 +233,7 @@ class acdXX_impl local_extend_to( start_tt, num_vars ); } - uint32_t column_multiplicity( STT const& tt, uint32_t free_set_size ) + uint32_t column_multiplicity( STT const& tt, uint32_t const free_set_size ) { assert( free_set_size <= 5 ); @@ -264,7 +269,7 @@ class acdXX_impl return size; } - uint32_t column_multiplicity2( STT const& tt, uint32_t free_set_size, uint32_t const limit ) + uint32_t column_multiplicity2( STT const& tt, uint32_t const free_set_size, uint32_t const limit ) { assert( free_set_size <= 5 ); @@ -412,9 +417,6 @@ class acdXX_impl bool find_decomposition_bs_offset( uint32_t free_set_size, uint32_t offset ) { STT tt = best_tt; - - /* works up to 11 input truth tables */ - assert( num_vars <= 11 ); best_free_set = free_set_size; /* special case */ @@ -504,9 +506,6 @@ class acdXX_impl { STT tt = start_tt; - /* works up to 11 input truth tables */ - assert( num_vars <= 11 ); - /* init combinations */ uint32_t pComb[11], pInvPerm[11], shared_set[4]; for ( uint32_t i = 0; i < num_vars; ++i ) @@ -1225,8 +1224,6 @@ class acdXX_impl private: uint32_t best_multiplicity{ UINT32_MAX }; uint32_t best_free_set{ UINT32_MAX }; - uint32_t best_multiplicity0{ UINT32_MAX }; - uint32_t best_multiplicity1{ UINT32_MAX }; uint32_t bs_support_size{ UINT32_MAX }; uint32_t num_shared_vars{ 0 }; STT best_tt; diff --git a/src/map/if/if.h b/src/map/if/if.h index 33621ac92..47fe109ca 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -552,7 +552,7 @@ extern int If_CutPerformCheck07( If_Man_t * p, unsigned * pTruth, in extern int If_CutPerformCheck08( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck10( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck16( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); -extern int If_CutPerformCheck66( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); +extern int If_CutPerformCheckXX( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck45( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck54( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); extern int If_CutPerformCheck75( If_Man_t * p, unsigned * pTruth, int nVars, int nLeaves, char * pStr ); diff --git a/src/map/if/ifDec66.c b/src/map/if/ifDec66.c index 5c2dce53b..9c486685c 100644 --- a/src/map/if/ifDec66.c +++ b/src/map/if/ifDec66.c @@ -24,7 +24,7 @@ ABC_NAMESPACE_IMPL_START -#define CLU_VAR_MAX 16 +#define CLU_VAR_MAX 11 #define CLU_MEM_MAX 1000 // 1 GB #define CLU_UNUSED 0xff @@ -252,7 +252,7 @@ unsigned * If_CluHashLookup2( If_Man_t * p, word * pTruth, int t ) } // returns if successful -int If_CluCheck66( If_Man_t * p, word * pTruth0, int nVars, int fHashing ) +int If_CluCheckXX( If_Man_t * p, word * pTruth0, int lutSize, int nVars, int fHashing ) { If_Grp_t G1 = {0}; unsigned * pHashed = NULL; @@ -267,32 +267,7 @@ int If_CluCheck66( If_Man_t * p, word * pTruth0, int nVars, int fHashing ) /* new entry */ if ( G1.nVars == 0 ) { - G1.nVars = acd66_evaluate( pTruth0, nVars, 0 ); - } - - if ( pHashed ) - *pHashed = If_CluGrp2Uns2( &G1 ); - - return G1.nVars; -} - -// returns if successful -int If_CluCheck666( If_Man_t * p, word * pTruth0, int nVars, int fHashing ) -{ - If_Grp_t G1 = {0}; - unsigned * pHashed = NULL; - - if ( p && fHashing ) - { - pHashed = If_CluHashLookup2( p, pTruth0, 0 ); - if ( pHashed && *pHashed != CLU_UNUSED ) - If_CluUns2Grp2( *pHashed, &G1 ); - } - - /* new entry */ - if ( G1.nVars == 0 ) - { - G1.nVars = acd666_evaluate( pTruth0, nVars, 0 ); + G1.nVars = acdXX_decompose( pTruth0, lutSize, nVars, NULL ); } if ( pHashed ) @@ -312,12 +287,11 @@ int If_CluCheck666( If_Man_t * p, word * pTruth0, int nVars, int fHashing ) SeeAlso [] ***********************************************************************/ -int If_CutPerformCheck66( If_Man_t * p, unsigned * pTruth0, int nVars, int nLeaves, char * pStr ) +int If_CutPerformCheckXX( If_Man_t * p, unsigned * pTruth0, int nVars, int nLeaves, char * pStr ) { unsigned pTruth[IF_MAX_FUNC_LUTSIZE > 5 ? 1 << (IF_MAX_FUNC_LUTSIZE - 5) : 1]; - int i, Length; + int Length; // stretch the truth table - assert( nVars >= 6 ); memcpy( pTruth, pTruth0, sizeof(word) * Abc_TtWordNum(nVars) ); Abc_TtStretch6( (word *)pTruth, nLeaves, p->pPars->nLutSize ); @@ -327,35 +301,30 @@ int If_CutPerformCheck66( If_Man_t * p, unsigned * pTruth0, int nVars, int nLeav // quit if parameters are wrong Length = strlen(pStr); - if ( Length != 2 && Length != 3 ) + if ( Length != 2 ) { printf( "Wrong LUT struct (%s)\n", pStr ); return 0; } - for ( i = 0; i < Length; i++ ) + + int lutSize = pStr[0] - '0'; + if ( lutSize < 3 || lutSize > 6 ) { - if ( pStr[i] != '6' ) - { - printf( "The LUT size (%d) should belong to {6}.\n", pStr[i] - '0' ); - return 0; - } + printf( "The LUT size (%d) should belong to {3,4,5,6}.\n", lutSize ); + return 0; } - if ( ( Length == 2 && nLeaves > 11 ) || ( Length == 3 && nLeaves > 16 ) ) + if ( nLeaves >= 2 * lutSize ) { printf( "The cut size (%d) is too large for the LUT structure %s.\n", nLeaves, pStr ); return 0; } - // consider easy case - if ( nLeaves <= 6 ) + // consider trivial case + if ( nLeaves <= lutSize ) return 1; - // derive the decomposition - if ( Length == 2 ) - return If_CluCheck66(p, (word*)pTruth, nVars, 1); - else - return If_CluCheck666(p, (word*)pTruth, nVars, 1); + return If_CluCheckXX(p, (word*)pTruth, lutSize, nVars, 1); } //////////////////////////////////////////////////////////////////////// From 5b49724fcc209124b3f26e6e7807d1356b89ce21 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 11 Apr 2024 15:43:22 +0200 Subject: [PATCH 08/11] removing acd666 --- src/map/if/acd/acd666.hpp | 1257 ------------------------------------- 1 file changed, 1257 deletions(-) delete mode 100644 src/map/if/acd/acd666.hpp diff --git a/src/map/if/acd/acd666.hpp b/src/map/if/acd/acd666.hpp deleted file mode 100644 index 1086af0a3..000000000 --- a/src/map/if/acd/acd666.hpp +++ /dev/null @@ -1,1257 +0,0 @@ -/**C++File************************************************************** - - FileName [acd666.hpp] - - SystemName [ABC: Logic synthesis and verification system.] - - PackageName [Ashenhurst-Curtis decomposition.] - - Synopsis [Interface with the FPGA mapping package.] - - Author [Alessandro Tempia Calvino] - - Affiliation [EPFL] - - Date [Ver. 1.0. Started - Feb 8, 2024.] - -***********************************************************************/ -/*! - \file acd666.hpp - \brief Ashenhurst-Curtis decomposition for "666" cascade - - \author Alessandro Tempia Calvino -*/ - -#ifndef _ACD666_H_ -#define _ACD666_H_ -#pragma once - -#include -#include -#include -#include -#include - -#include "kitty_constants.hpp" -#include "kitty_constructors.hpp" -#include "kitty_dynamic_tt.hpp" -#include "kitty_operations.hpp" -#include "kitty_operators.hpp" -#include "kitty_static_tt.hpp" - -ABC_NAMESPACE_CXX_HEADER_START - -namespace acd -{ - -class acd666_impl -{ -private: - static constexpr uint32_t max_num_vars = 16; - using STT = kitty::static_truth_table; - using LTT = kitty::static_truth_table<6>; - -public: - explicit acd666_impl( uint32_t const num_vars, bool const verify = false ) - : num_vars( num_vars ), verify( verify ) - { - std::iota( permutations.begin(), permutations.end(), 0 ); - } - - /*! \brief Runs ACD 666 */ - bool run( word* ptt ) - { - assert( num_vars > 6 ); - - /* truth table is too large for the settings */ - if ( num_vars > max_num_vars || num_vars > 16 ) - { - return false; - } - - /* convert to static TT */ - init_truth_table( ptt ); - - /* run ACD trying different bound sets and free sets */ - return find_decomposition(); - } - - int compute_decomposition() - { - if ( best_multiplicity == UINT32_MAX ) - return -1; - - uint32_t n = num_luts == 3 ? rm_support_size : num_vars; - compute_decomposition_impl( n ); - - if ( verify && !verify_impl() ) - { - return 1; - } - - return 0; - } - - uint32_t get_num_edges() - { - if ( support_sizes[0] == UINT32_MAX ) - { - return UINT32_MAX; - } - - uint32_t num_edges = support_sizes[0] + support_sizes[1] + 1 + ( shared_vars[0] < UINT32_MAX ? 1 : 0 ); - - if ( num_luts == 2 ) - return num_edges; - - /* real value after support minimization */ - return num_edges + support_sizes[2] + 1 + ( shared_vars[1] < UINT32_MAX ? 1 : 0 ); - } - - /* contains a 1 for BS variables */ - // unsigned get_profile() - // { - // unsigned profile = 0; - - // if ( support_sizes[0] == UINT32_MAX ) - // return -1; - - // for ( uint32_t i = 0; i < bs_support_size; ++i ) - // { - // profile |= 1 << permutations[best_free_set + bs_support[i]]; - // } - - // return profile; - // } - - // void get_decomposition( unsigned char* decompArray ) - // { - // if ( support_sizes[0] == UINT32_MAX ) - // return; - - // get_decomposition_abc( decompArray ); - // } - -private: - bool find_decomposition() - { - best_multiplicity = UINT32_MAX; - best_free_set = UINT32_MAX; - - /* find ACD "66" for different number of variables in the free set */ - for ( uint32_t i = num_vars - 6; i <= 5; ++i ) - { - if ( find_decomposition_bs( start_tt, num_vars, i ) ) - { - num_luts = 2; - return true; - } - } - - /* find ACD "666" for different number of variables in the free set */ - bool dec_found = false; - uint32_t min_vars_free_set = num_vars <= 11 ? 1 : num_vars - 11; - uint32_t max_vars_free_set = num_vars <= 11 ? num_vars - 7 : 5; - for ( uint32_t i = max_vars_free_set; i >= min_vars_free_set; --i ) - // for ( uint32_t i = min_vars_free_set; i <= max_vars_free_set; ++i ) - { - dec_found = find_decomposition_bs( start_tt, num_vars, i ); - if ( dec_found ) - break; - } - - if ( !dec_found ) - { - best_multiplicity = UINT32_MAX; - return false; - } - - /* compute functions for the top and reminder LUT */ - compute_decomposition_impl_top( num_vars ); - - /* find ACD "66" for the remainder function */ - for ( uint32_t i = rm_support_size - 6; i <= 5; ++i ) - { - if ( find_decomposition_bs( remainder, rm_support_size, i ) ) - { - num_luts = 3; - fix_permutations_remainder( rm_support_size ); - return true; - } - } - - best_multiplicity = UINT32_MAX; - return false; - } - - void init_truth_table( word* ptt ) - { - uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) ); - - for ( uint32_t i = 0; i < num_blocks; ++i ) - { - start_tt._bits[i] = ptt[i]; - } - - local_extend_to( start_tt, num_vars ); - } - - uint32_t column_multiplicity( STT const& tt, uint32_t n, uint32_t free_set_size ) - { - assert( free_set_size <= 5 ); - - uint32_t const num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; - uint64_t const shift = UINT64_C( 1 ) << free_set_size; - uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; - uint32_t const limit = free_set_size < 5 ? 4 : 2; - uint32_t cofactors[4]; - uint32_t size = 0; - - /* extract iset functions */ - for ( auto i = 0u; i < num_blocks; ++i ) - { - uint64_t sub = tt._bits[i]; - for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) - { - uint32_t fs_fn = static_cast( sub & mask ); - uint32_t k; - for ( k = 0; k < size; ++k ) - { - if ( fs_fn == cofactors[k] ) - break; - } - if ( k == limit ) - return 5; - if ( k == size ) - cofactors[size++] = fs_fn; - sub >>= shift; - } - } - - return size; - } - - inline bool combinations_next( uint32_t n, uint32_t k, uint32_t* pComb, uint32_t* pInvPerm, STT& tt ) - { - uint32_t i; - - for ( i = k - 1; pComb[i] == n - k + i; --i ) - { - if ( i == 0 ) - return false; - } - - /* move vars */ - uint32_t var_old = pComb[i]; - uint32_t pos_new = pInvPerm[var_old + 1]; - std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] ); - std::swap( pComb[i], pComb[pos_new] ); - swap_inplace_local( tt, n, i, pos_new ); - - for ( uint32_t j = i + 1; j < k; j++ ) - { - var_old = pComb[j]; - pos_new = pInvPerm[pComb[j - 1] + 1]; - std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] ); - std::swap( pComb[j], pComb[pos_new] ); - swap_inplace_local( tt, n, j, pos_new ); - } - - return true; - } - - bool find_decomposition_bs( STT tt, uint32_t n, uint32_t free_set_size ) - { - /* works up to 16 input truth tables */ - assert( n <= 16 ); - - /* init combinations */ - uint32_t pComb[16], pInvPerm[16]; - for ( uint32_t i = 0; i < n; ++i ) - { - pComb[i] = pInvPerm[i] = i; - } - - /* enumerate combinations */ - best_free_set = free_set_size; - do - { - uint32_t cost = column_multiplicity( tt, n, free_set_size ); - if ( cost == 2 ) - { - best_tt = tt; - best_multiplicity = cost; - for ( uint32_t i = 0; i < n; ++i ) - { - permutations[i] = pComb[i]; - } - return true; - } - else if ( cost <= 4 && free_set_size < 5 ) - { - /* look for a shared variable */ - best_multiplicity = cost; - int res = check_shared_set( tt, n ); - - if ( res > 0 ) - { - best_tt = tt; - for ( uint32_t i = 0; i < n; ++i ) - { - permutations[i] = pComb[i]; - } - /* move shared variable as the most significative one */ - swap_inplace_local( best_tt, n, res, n - 1 ); - std::swap( permutations[res], permutations[n - 1] ); - return true; - } - } - } while ( combinations_next( n, free_set_size, pComb, pInvPerm, tt ) ); - - return false; - } - - inline bool check_shared_var( STT const& tt, uint32_t n, uint32_t free_set_size, uint32_t shared_var ) - { - assert( free_set_size <= 5 ); - - uint32_t const num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; - uint64_t const shift = UINT64_C( 1 ) << free_set_size; - uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; - uint32_t cofactors[2][4]; - uint32_t size[2] = { 0, 0 }; - uint32_t shared_var_shift = shared_var - free_set_size; - - /* extract iset functions */ - uint32_t iteration_counter = 0; - for ( auto i = 0u; i < num_blocks; ++i ) - { - uint64_t sub = tt._bits[i]; - for ( auto j = 0; j < ( 64 >> free_set_size ); ++j ) - { - uint32_t fs_fn = static_cast( sub & mask ); - uint32_t p = ( iteration_counter >> shared_var_shift ) & 1; - uint32_t k; - for ( k = 0; k < size[p]; ++k ) - { - if ( fs_fn == cofactors[p][k] ) - break; - } - if ( k == 2 ) - return false; - if ( k == size[p] ) - cofactors[p][size[p]++] = fs_fn; - sub >>= shift; - ++iteration_counter; - } - } - - return true; - } - - inline int check_shared_set( STT const& tt, uint32_t n ) - { - /* find one shared set variable */ - for ( uint32_t i = best_free_set; i < n; ++i ) - { - /* check the multiplicity of cofactors */ - if ( check_shared_var( tt, n, best_free_set, i ) ) - { - return i; - } - } - - return -1; - } - - void compute_decomposition_impl_top( uint32_t n, bool verbose = false ) - { - bool has_shared_set = best_multiplicity > 2; - - /* construct isets involved in multiplicity */ - STT isets0[2]; - STT isets1[2]; - - /* construct isets */ - uint32_t offset = 0; - uint32_t num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; - uint64_t const shift = UINT64_C( 1 ) << best_free_set; - uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; - - /* limit analysis on 0 cofactor of the shared variable */ - if ( has_shared_set ) - num_blocks >>= 1; - - uint64_t fs_fun[4] = { best_tt._bits[0] & mask, 0, 0, 0 }; - - for ( auto i = 0u; i < num_blocks; ++i ) - { - uint64_t cof = best_tt._bits[i]; - for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) - { - uint64_t val = cof & mask; - if ( val == fs_fun[0] ) - { - isets0[0]._bits[i / shift] |= UINT64_C( 1 ) << ( j + offset ); - } - else - { - isets0[1]._bits[i / shift] |= UINT64_C( 1 ) << ( j + offset ); - fs_fun[1] = val; - } - cof >>= shift; - } - offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; - } - - /* continue on the 1 cofactor if shared set */ - if ( has_shared_set ) - { - fs_fun[2] = best_tt._bits[num_blocks] & mask; - for ( auto i = num_blocks; i < ( num_blocks << 1 ); ++i ) - { - uint64_t cof = best_tt._bits[i]; - for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) - { - uint64_t val = cof & mask; - if ( val == fs_fun[2] ) - { - isets1[0]._bits[i / shift] |= UINT64_C( 1 ) << ( j + offset ); - } - else - { - isets1[1]._bits[i / shift] |= UINT64_C( 1 ) << ( j + offset ); - fs_fun[3] = val; - } - cof >>= shift; - } - offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F; - } - } - - /* find the support minimizing combination with shared set */ - compute_functions_top( isets0, isets1, fs_fun, n ); - - /* print functions */ - if ( verbose ) - { - std::cout << "RM function : "; - kitty::print_hex( remainder ); - std::cout << "\n"; - LTT f; - f._bits = dec_funcs[2]; - std::cout << "Composition function: "; - kitty::print_hex( f ); - std::cout << "\n"; - } - } - - void compute_decomposition_impl( uint32_t n, bool verbose = false ) - { - bool has_shared_set = best_multiplicity > 2; - - /* construct isets involved in multiplicity */ - LTT isets0[2]; - LTT isets1[2]; - - /* construct isets */ - uint32_t offset = 0; - uint32_t num_blocks = ( n > 6 ) ? ( 1u << ( n - 6 ) ) : 1; - uint64_t const shift = UINT64_C( 1 ) << best_free_set; - uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1; - - /* limit analysis on 0 cofactor of the shared variable */ - if ( has_shared_set ) - num_blocks >>= 1; - - uint64_t fs_fun[4] = { best_tt._bits[0] & mask, 0, 0, 0 }; - - for ( auto i = 0u; i < num_blocks; ++i ) - { - uint64_t cof = best_tt._bits[i]; - for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) - { - uint64_t val = cof & mask; - - if ( val == fs_fun[0] ) - { - isets0[0]._bits |= UINT64_C( 1 ) << ( j + offset ); - } - else - { - isets0[1]._bits |= UINT64_C( 1 ) << ( j + offset ); - fs_fun[1] = val; - } - - cof >>= shift; - } - - offset = ( offset + ( 64 >> best_free_set ) ) % 64; - } - - /* continue on the 1 cofactor if shared set */ - if ( has_shared_set ) - { - fs_fun[2] = best_tt._bits[num_blocks] & mask; - for ( auto i = num_blocks; i < ( num_blocks << 1 ); ++i ) - { - uint64_t cof = best_tt._bits[i]; - for ( auto j = 0; j < ( 64 >> best_free_set ); ++j ) - { - uint64_t val = cof & mask; - - if ( val == fs_fun[2] ) - { - isets1[0]._bits |= UINT64_C( 1 ) << ( j + offset ); - } - else - { - isets1[1]._bits |= UINT64_C( 1 ) << ( j + offset ); - fs_fun[3] = val; - } - - cof >>= shift; - } - - offset = ( offset + ( 64 >> best_free_set ) ) % 64; - } - } - - /* find the support minimizing combination with shared set */ - compute_functions( isets0, isets1, fs_fun, n ); - - /* print functions */ - if ( verbose ) - { - LTT f; - f._bits = dec_funcs[0]; - std::cout << "BS function : "; - kitty::print_hex( f ); - std::cout << "\n"; - f._bits = dec_funcs[1]; - std::cout << "Composition function: "; - kitty::print_hex( f ); - std::cout << "\n"; - } - } - - inline void compute_functions_top( STT isets0[2], STT isets1[2], uint64_t fs_fun[4], uint32_t n ) - { - /* u = 2 no support minimization */ - if ( best_multiplicity < 3 ) - { - shared_vars[1] = UINT32_MAX; - remainder = isets0[0]; - rm_support_size = n - best_free_set; - for ( uint32_t i = 0; i < n - best_free_set; ++i ) - { - rm_support[i] = permutations[i + best_free_set]; - } - compute_composition( fs_fun, 2 ); - return; - } - - shared_vars[1] = permutations[n - 1]; - - /* u = 4 two possibilities */ - if ( best_multiplicity == 4 ) - { - compute_functions4_top( isets0, isets1, fs_fun, n ); - return; - } - - /* u = 3 if both sets have multiplicity 2 there are no don't cares */ - if ( best_multiplicity0 == best_multiplicity1 ) - { - compute_functions4_top( isets0, isets1, fs_fun, n ); - return; - } - - /* u = 3 one set has multiplicity 1, use don't cares */ - compute_functions3_top( isets0, isets1, fs_fun, n ); - } - - inline void compute_functions( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4], uint32_t n ) - { - /* u = 2 no support minimization */ - if ( best_multiplicity < 3 ) - { - shared_vars[0] = UINT32_MAX; - dec_funcs[0] = isets0[0]._bits; - support_sizes[0] = n - best_free_set; - for ( uint32_t i = 0; i < n - best_free_set; ++i ) - { - supports[0][i] = permutations[i + best_free_set]; - } - compute_composition( fs_fun, 1 ); - return; - } - - shared_vars[0] = permutations[n - 1]; - - /* u = 4 two possibilities */ - if ( best_multiplicity == 4 ) - { - compute_functions4( isets0, isets1, fs_fun, n ); - return; - } - - /* u = 3 if both sets have multiplicity 2 there are no don't cares */ - if ( best_multiplicity0 == best_multiplicity1 ) - { - compute_functions4( isets0, isets1, fs_fun, n ); - return; - } - - /* u = 3 one set has multiplicity 1, use don't cares */ - compute_functions3( isets0, isets1, fs_fun, n ); - } - - inline void compute_functions4_top( STT isets0[2], STT isets1[2], uint64_t fs_fun[4], uint32_t n ) - { - STT f; - uint32_t const num_iset_vars = n - best_free_set; - uint32_t const num_blocks = 1u << ( num_iset_vars - 6 ); - - assert( num_iset_vars > 6 ); - for ( uint32_t i = 0; i < num_blocks; ++i ) - { - f._bits[i] = isets0[0]._bits[i] | isets1[1]._bits[i]; - } - - /* count the number of support variables */ - uint32_t support_vars1 = 0; - for ( uint32_t i = 0; i < num_iset_vars; ++i ) - { - support_vars1 += has_var( f, num_iset_vars, i ) ? 1 : 0; - rm_support[i] = permutations[i + best_free_set]; - } - - /* use a different set */ - for ( uint32_t i = 0; i < num_blocks; ++i ) - { - f._bits[i] = isets0[0]._bits[i] | isets1[0]._bits[i]; - } - - uint32_t support_vars2 = 0; - for ( uint32_t i = 0; i < n - best_free_set; ++i ) - { - support_vars2 += has_var( f, num_iset_vars, i ) ? 1 : 0; - } - - rm_support_size = support_vars2; - if ( support_vars2 > support_vars1 ) - { - for ( uint32_t i = 0; i < num_blocks; ++i ) - { - f._bits[i] = isets0[0]._bits[i] | isets1[1]._bits[i]; - } - std::swap( fs_fun[2], fs_fun[3] ); - rm_support_size = support_vars1; - } - - /* move variables */ - if ( rm_support_size < num_iset_vars ) - { - support_vars1 = 0; - for ( uint32_t i = 0; i < num_iset_vars; ++i ) - { - if ( !has_var( f, num_iset_vars, i ) ) - { - continue; - } - - if ( support_vars1 < i ) - { - swap_inplace_local( f, num_iset_vars, support_vars1, i ); - } - - rm_support[support_vars1] = permutations[i + best_free_set]; - ++support_vars1; - } - } - - remainder = f; - compute_composition( fs_fun, 2 ); - } - - inline void compute_functions4( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4], uint32_t n ) - { - uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; - LTT f = isets0[0] | isets1[1]; - LTT care; - - assert( n - best_free_set <= 6 ); - care._bits = masks[n - best_free_set]; - - /* count the number of support variables */ - uint32_t support_vars1 = 0; - for ( uint32_t i = 0; i < n - best_free_set; ++i ) - { - support_vars1 += has_var6( f, care, i ) ? 1 : 0; - supports[0][i] = permutations[i + best_free_set]; - } - - /* use a different set */ - f = isets0[0] | isets1[0]; - - uint32_t support_vars2 = 0; - for ( uint32_t i = 0; i < n - best_free_set; ++i ) - { - support_vars2 += has_var6( f, care, i ) ? 1 : 0; - } - - support_sizes[0] = support_vars2; - if ( support_vars2 > support_vars1 ) - { - f = isets0[0] | isets1[1]; - std::swap( fs_fun[2], fs_fun[3] ); - support_sizes[0] = support_vars1; - } - - /* move variables */ - if ( support_sizes[0] < n - best_free_set ) - { - support_vars1 = 0; - for ( uint32_t i = 0; i < n - best_free_set; ++i ) - { - if ( !has_var6( f, care, i ) ) - { - continue; - } - - if ( support_vars1 < i ) - { - kitty::swap_inplace( f, support_vars1, i ); - } - - supports[0][support_vars1] = permutations[i + best_free_set]; - ++support_vars1; - } - } - - dec_funcs[0] = f._bits; - compute_composition( fs_fun, 1 ); - } - - inline void compute_functions3_top( STT isets0[2], STT isets1[2], uint64_t fs_fun[4], uint32_t n ) - { - STT f, care; - uint32_t const num_iset_vars = n - best_free_set; - uint32_t const num_blocks = 1u << ( num_iset_vars - 6 ); - - assert( num_iset_vars > 6 ); - for ( uint32_t i = 0; i < num_blocks; ++i ) - { - f._bits[i] = isets0[0]._bits[i] | isets1[0]._bits[i]; - } - - assert( n - best_free_set <= 6 ); - - /* init the care set */ - if ( best_multiplicity0 == 1 ) - { - for ( uint32_t i = 0; i < num_blocks; ++i ) - { - care._bits[i] = ~( isets0[0]._bits[i] ); - } - fs_fun[1] = fs_fun[0]; - } - else - { - for ( uint32_t i = 0; i < num_blocks; ++i ) - { - care._bits[i] = ~( isets1[0]._bits[i] ); - } - fs_fun[3] = fs_fun[2]; - } - - /* count the number of support variables */ - uint32_t support_vars = 0; - for ( uint32_t i = 0; i < num_iset_vars; ++i ) - { - if ( !has_var_support( f, care, num_iset_vars, i ) ) - { - adjust_truth_table_on_dc( f, care, n, i ); - continue; - } - - if ( support_vars < i ) - { - kitty::swap_inplace( f, support_vars, i ); - } - - rm_support[support_vars] = permutations[i + best_free_set]; - ++support_vars; - } - - rm_support_size = support_vars; - remainder = f; - compute_composition( fs_fun, 2 ); - } - - inline void compute_functions3( LTT isets0[2], LTT isets1[2], uint64_t fs_fun[4], uint32_t n ) - { - uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX }; - LTT f = isets0[0] | isets1[0]; - LTT care; - - assert( n - best_free_set <= 6 ); - - /* init the care set */ - if ( best_multiplicity0 == 1 ) - { - care._bits = masks[n - best_free_set] & ( ~isets0[0]._bits ); - fs_fun[1] = fs_fun[0]; - } - else - { - care._bits = masks[n - best_free_set] & ( ~isets1[0]._bits ); - fs_fun[3] = fs_fun[2]; - } - - /* count the number of support variables */ - uint32_t support_vars = 0; - for ( uint32_t i = 0; i < n - best_free_set; ++i ) - { - if ( !has_var6( f, care, i ) ) - { - adjust_truth_table_on_dc6( f, care, i ); - continue; - } - - if ( support_vars < i ) - { - kitty::swap_inplace( f, support_vars, i ); - } - - supports[0][support_vars] = i; - ++support_vars; - } - - support_sizes[0] = support_vars; - dec_funcs[0] = f._bits; - compute_composition( fs_fun, 1 ); - } - - void compute_composition( uint64_t fs_fun[4], uint32_t index ) - { - dec_funcs[index] = fs_fun[0] << ( 1 << best_free_set ); - dec_funcs[index] |= fs_fun[1]; - - if ( best_multiplicity > 2 ) - { - dec_funcs[index] |= fs_fun[2] << ( ( 2 << best_free_set ) + ( 1 << best_free_set ) ); - dec_funcs[index] |= fs_fun[3] << ( 2 << best_free_set ); - } - - for ( uint32_t i = 0; i < best_free_set; ++i ) - { - supports[index][i] = permutations[i]; - } - support_sizes[index] = best_free_set; - } - - void fix_permutations_remainder( uint32_t n ) - { - for ( uint32_t i = 0; i < n; ++i ) - { - permutations[i] = rm_support[permutations[i]]; - } - } - - template - void local_extend_to( TT_type& tt, uint32_t real_num_vars ) - { - if ( real_num_vars < 6 ) - { - auto mask = *tt.begin(); - - for ( auto i = real_num_vars; i < num_vars; ++i ) - { - mask |= ( mask << ( 1 << i ) ); - } - - std::fill( tt.begin(), tt.end(), mask ); - } - else - { - uint32_t num_blocks = ( 1u << ( real_num_vars - 6 ) ); - auto it = tt.begin(); - while ( it != tt.end() ) - { - it = std::copy( tt.cbegin(), tt.cbegin() + num_blocks, it ); - } - } - } - - void swap_inplace_local( STT& tt, uint32_t n, uint8_t var_index1, uint8_t var_index2 ) - { - if ( var_index1 == var_index2 ) - { - return; - } - - if ( var_index1 > var_index2 ) - { - std::swap( var_index1, var_index2 ); - } - - assert( n > 6 ); - const uint32_t num_blocks = 1 << ( n - 6 ); - - if ( var_index2 <= 5 ) - { - const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2]; - const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 ); - std::transform( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, std::begin( tt._bits ), - [shift, &pmask]( uint64_t word ) { - return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift ); - } ); - } - else if ( var_index1 <= 5 ) /* in this case, var_index2 > 5 */ - { - const auto step = 1 << ( var_index2 - 6 ); - const auto shift = 1 << var_index1; - auto it = std::begin( tt._bits ); - while ( it != std::begin( tt._bits ) + num_blocks ) - { - for ( auto i = decltype( step ){ 0 }; i < step; ++i ) - { - const auto low_to_high = ( *( it + i ) & kitty::detail::projections[var_index1] ) >> shift; - const auto high_to_low = ( *( it + i + step ) << shift ) & kitty::detail::projections[var_index1]; - *( it + i ) = ( *( it + i ) & ~kitty::detail::projections[var_index1] ) | high_to_low; - *( it + i + step ) = ( *( it + i + step ) & kitty::detail::projections[var_index1] ) | low_to_high; - } - it += 2 * step; - } - } - else - { - const auto step1 = 1 << ( var_index1 - 6 ); - const auto step2 = 1 << ( var_index2 - 6 ); - auto it = std::begin( tt._bits ); - while ( it != std::begin( tt._bits ) + num_blocks ) - { - for ( auto i = 0; i < step2; i += 2 * step1 ) - { - for ( auto j = 0; j < step1; ++j ) - { - std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) ); - } - } - it += 2 * step2; - } - } - } - - inline bool has_var6( const LTT& tt, const LTT& care, uint8_t var_index ) - { - if ( ( ( ( tt._bits >> ( uint64_t( 1 ) << var_index ) ) ^ tt._bits ) & kitty::detail::projections_neg[var_index] & ( care._bits >> ( uint64_t( 1 ) << var_index ) ) & care._bits ) != 0 ) - { - return true; - } - - return false; - } - - inline bool has_var( const STT& tt, uint32_t n, uint8_t var_index ) - { - uint32_t const num_blocks = 1u << ( n - 6 ); - - if ( var_index < 6 ) - { - return std::any_of( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, - [var_index]( uint64_t word ) { return ( ( word >> ( uint64_t( 1 ) << var_index ) ) & kitty::detail::projections_neg[var_index] ) != - ( word & kitty::detail::projections_neg[var_index] ); } ); - } - - const auto step = 1 << ( var_index - 6 ); - for ( auto i = 0u; i < num_blocks; i += 2 * step ) - { - for ( auto j = 0; j < step; ++j ) - { - if ( tt._bits[i + j] != tt._bits[i + j + step] ) - { - return true; - } - } - } - return false; - } - - bool has_var_support( const STT& tt, const STT& care, uint32_t real_num_vars, uint8_t var_index ) - { - assert( var_index < real_num_vars ); - assert( real_num_vars <= tt.num_vars() ); - assert( tt.num_vars() == care.num_vars() ); - - const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) ); - if ( real_num_vars <= 6 || var_index < 6 ) - { - auto it_tt = std::begin( tt._bits ); - auto it_care = std::begin( care._bits ); - while ( it_tt != std::begin( tt._bits ) + num_blocks ) - { - if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 ) - { - return true; - } - ++it_tt; - ++it_care; - } - - return false; - } - - const auto step = 1 << ( var_index - 6 ); - for ( auto i = 0u; i < num_blocks; i += 2 * step ) - { - for ( auto j = 0; j < step; ++j ) - { - if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 ) - { - return true; - } - } - } - - return false; - } - - void adjust_truth_table_on_dc6( LTT& tt, LTT& care, uint32_t var_index ) - { - uint64_t new_bits = tt._bits & care._bits; - tt._bits = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | - ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); - care._bits = care._bits | ( care._bits >> ( uint64_t( 1 ) << var_index ) ); - } - - void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t n, uint32_t var_index ) - { - assert( var_index < n ); - const uint32_t num_blocks = n <= 6 ? 1 : ( 1 << ( n - 6 ) ); - - if ( n <= 6 || var_index < 6 ) - { - auto it_tt = std::begin( tt._bits ); - auto it_care = std::begin( care._bits ); - while ( it_tt != std::begin( tt._bits ) + num_blocks ) - { - uint64_t new_bits = *it_tt & *it_care; - *it_tt = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) | - ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] ); - *it_care = *it_care | ( *it_care >> ( uint64_t( 1 ) << var_index ) ); - - ++it_tt; - ++it_care; - } - return; - } - - const auto step = 1 << ( var_index - 6 ); - for ( auto i = 0u; i < num_blocks; i += 2 * step ) - { - for ( auto j = 0; j < step; ++j ) - { - tt._bits[i + j] = ( tt._bits[i + j] & care._bits[i + j] ) | ( tt._bits[i + j + step] & care._bits[i + j + step] ); - tt._bits[i + j + step] = tt._bits[i + j]; - care._bits[i + j] = care._bits[i + j] | care._bits[i + j + step]; - care._bits[i + j + step] = care._bits[i + j]; - } - } - } - - /* Decomposition format for ABC - * - * The record is an array of unsigned chars where: - * - the first unsigned char entry stores the number of unsigned chars in the record - * - the second entry stores the number of LUTs - * After this, several sub-records follow, each representing one LUT as follows: - * - an unsigned char entry listing the number of fanins - * - a list of fanins, from the LSB to the MSB of the truth table. The N inputs of the original function - * have indexes from 0 to N-1, followed by the internal signals in a topological order - * - the LUT truth table occupying 2^(M-3) bytes, where M is the fanin count of the LUT, from the LSB to the MSB. - * A 2-input LUT, which takes 4 bits, should be stretched to occupy 8 bits (one unsigned char) - * A 0- or 1-input LUT can be represented similarly but it is not expected that such LUTs will be represented - */ - // void get_decomposition_abc( unsigned char* decompArray ) - // { - // unsigned char* pArray = decompArray; - // unsigned char bytes = 2; - - // /* write number of LUTs */ - // pArray++; - // *pArray = 2; - // pArray++; - - // /* write BS LUT */ - // /* write fanin size */ - // *pArray = bs_support_size; - // pArray++; - // ++bytes; - - // /* write support */ - // for ( uint32_t i = 0; i < bs_support_size; ++i ) - // { - // *pArray = (unsigned char)permutations[bs_support[i] + best_free_set]; - // pArray++; - // ++bytes; - // } - - // /* write truth table */ - // uint32_t tt_num_bytes = ( bs_support_size <= 3 ) ? 1 : ( 1 << ( bs_support_size - 3 ) ); - // for ( uint32_t i = 0; i < tt_num_bytes; ++i ) - // { - // *pArray = (unsigned char)( ( dec_funcs[0] >> ( 8 * i ) ) & 0xFF ); - // pArray++; - // ++bytes; - // } - - // /* write top LUT */ - // /* write fanin size */ - // uint32_t support_size = best_free_set + 1 + ( best_multiplicity > 2 ? 1 : 0 ); - // *pArray = support_size; - // pArray++; - // ++bytes; - - // /* write support */ - // for ( uint32_t i = 0; i < best_free_set; ++i ) - // { - // *pArray = (unsigned char)permutations[i]; - // pArray++; - // ++bytes; - // } - - // *pArray = (unsigned char)num_vars; - // pArray++; - // ++bytes; - - // if ( best_multiplicity > 2 ) - // { - // *pArray = (unsigned char)permutations[num_vars - 1]; - // pArray++; - // ++bytes; - // } - - // /* write truth table */ - // tt_num_bytes = ( support_size <= 3 ) ? 1 : ( 1 << ( support_size - 3 ) ); - // for ( uint32_t i = 0; i < tt_num_bytes; ++i ) - // { - // *pArray = (unsigned char)( ( dec_funcs[1] >> ( 8 * i ) ) & 0xFF ); - // pArray++; - // ++bytes; - // } - - // /* write numBytes */ - // *decompArray = bytes; - // } - - bool verify_impl() - { - /* create PIs */ - STT pis[max_num_vars]; - for ( uint32_t i = 0; i < num_vars; ++i ) - { - kitty::create_nth_var( pis[i], i ); - } - - STT bsi[6]; - STT bsf_sim; - for ( uint32_t lut_i = 0; lut_i < num_luts; ++lut_i ) - { - for ( uint32_t i = 0; i < support_sizes[lut_i]; ++i ) - { - bsi[i] = pis[supports[lut_i][i]]; - } - - STT top_sim; - for ( uint32_t i = 0u; i < ( 1 << num_vars ); ++i ) - { - uint32_t pattern = 0u; - for ( auto j = 0; j < support_sizes[lut_i]; ++j ) - { - pattern |= get_bit( bsi[j], i ) << j; - } - if ( lut_i != 0 ) - { - pattern |= get_bit( bsf_sim, i ) << support_sizes[lut_i]; - if ( shared_vars[lut_i - 1] < UINT32_MAX ) - { - pattern |= get_bit( pis[shared_vars[lut_i - 1]], i ) << ( support_sizes[lut_i] + 1 ); - } - } - if ( ( dec_funcs[lut_i] >> pattern ) & 1 ) - { - set_bit( top_sim, i ); - } - } - - bsf_sim = top_sim; - } - - /* extend function */ - local_extend_to( bsf_sim, num_vars ); - - for ( uint32_t i = 0; i < ( 1 << ( num_vars - 6 ) ); ++i ) - { - if ( bsf_sim._bits[i] != start_tt._bits[i] ) - { - std::cout << "Found incorrect decomposition\n"; - report_tt( bsf_sim ); - std::cout << " instead_of\n"; - report_tt( start_tt ); - return false; - } - } - - return true; - } - - uint32_t get_bit( const STT& tt, uint64_t index ) - { - return ( tt._bits[index >> 6] >> ( index & 0x3f ) ) & 0x1; - } - - void set_bit( STT& tt, uint64_t index ) - { - tt._bits[index >> 6] |= uint64_t( 1 ) << ( index & 0x3f ); - } - - void report_tt( STT const& stt ) - { - kitty::dynamic_truth_table tt( num_vars ); - - std::copy( std::begin( stt._bits ), std::begin( stt._bits ) + ( 1 << ( num_vars - 6 ) ), std::begin( tt ) ); - kitty::print_hex( tt ); - std::cout << "\n"; - } - -private: - uint32_t best_multiplicity{ UINT32_MAX }; - uint32_t best_free_set{ UINT32_MAX }; - uint32_t best_multiplicity0{ UINT32_MAX }; - uint32_t best_multiplicity1{ UINT32_MAX }; - uint32_t rm_support_size{ UINT32_MAX }; - uint32_t num_luts{ 0 }; - - STT start_tt; - STT best_tt; - STT remainder; - - uint64_t dec_funcs[3]; - uint32_t supports[3][6]; - uint32_t support_sizes[3] = { UINT32_MAX, UINT32_MAX, UINT32_MAX }; - uint32_t rm_support[15]; - uint32_t shared_vars[2]; - - uint32_t const num_vars; - bool const verify; - std::array permutations; -}; - -} // namespace acd - -ABC_NAMESPACE_CXX_HEADER_END - -#endif // _ACD666_H_ \ No newline at end of file From e8924e5534878acddb0f6005d20fbf45c85a0077 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 11 Apr 2024 15:44:52 +0200 Subject: [PATCH 09/11] Fixes and improvements --- src/map/if/acd/ac_wrapper.cpp | 135 ++++++++++++++++++---------------- src/map/if/acd/ac_wrapper.h | 3 +- src/map/if/acd/acd66.hpp | 17 ++++- src/map/if/acd/acdXX.hpp | 19 ++++- 4 files changed, 101 insertions(+), 73 deletions(-) diff --git a/src/map/if/acd/ac_wrapper.cpp b/src/map/if/acd/ac_wrapper.cpp index 7914bbd42..ad923edd2 100644 --- a/src/map/if/acd/ac_wrapper.cpp +++ b/src/map/if/acd/ac_wrapper.cpp @@ -23,94 +23,99 @@ ABC_NAMESPACE_IMPL_START -static constexpr bool use_generic_acd = true; - int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ) { using namespace acd; - if ( use_generic_acd ) - { - ac_decomposition_params ps; - ps.lut_size = lutSize; - ps.use_first = false; - ps.try_no_late_arrival = static_cast( try_no_late_arrival ); - ac_decomposition_stats st; + ac_decomposition_params ps; + ps.lut_size = lutSize; + ps.use_first = false; + ps.try_no_late_arrival = static_cast( try_no_late_arrival ); + ac_decomposition_stats st; - ac_decomposition_impl acd( nVars, ps, &st ); - int val = acd.run( pTruth, *pdelay ); + ac_decomposition_impl acd( nVars, ps, &st ); + int val = acd.run( pTruth, *pdelay ); - if ( val < 0 ) - { - *pdelay = 0; - return -1; - } + if ( val < 0 ) + { + *pdelay = 0; + return -1; + } - *pdelay = acd.get_profile(); - *cost = st.num_luts; + *pdelay = acd.get_profile(); + *cost = st.num_luts; - return val; - } - else - { - acd66_impl acd( nVars ); - int val = acd.run( pTruth, *pdelay ); + return val; +} - if ( val == 0 ) - { - *pdelay = 0; - return -1; - } +int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ) +{ + using namespace acd; - *pdelay = acd.get_profile(); - *cost = 2; + ac_decomposition_params ps; + ps.lut_size = lutSize; + ps.use_first = true; + ac_decomposition_stats st; + + ac_decomposition_impl acd( nVars, ps, &st ); + acd.run( pTruth, *pdelay ); + int val = acd.compute_decomposition(); - return val; + if ( val < 0 ) + { + *pdelay = 0; + return -1; } + + *pdelay = acd.get_profile(); + acd.get_decomposition( decomposition ); + return 0; } -int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ) +int acd2_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ) { using namespace acd; - if ( use_generic_acd ) + acdXX_params ps; + ps.lut_size = lutSize; + ps.max_shared_vars = lutSize - 2; + acdXX_impl acd( nVars, ps ); + int val = acd.run( pTruth, *pdelay ); + + if ( val == 0 ) { - ac_decomposition_params ps; - ps.lut_size = lutSize; - ps.use_first = true; - ac_decomposition_stats st; - - ac_decomposition_impl acd( nVars, ps, &st ); - acd.run( pTruth, *pdelay ); - int val = acd.compute_decomposition(); - - if ( val < 0 ) - { - *pdelay = 0; - return -1; - } - - *pdelay = acd.get_profile(); - acd.get_decomposition( decomposition ); - return 0; + *pdelay = 0; + return -1; } - else - { - acd66_impl acd( nVars ); - acd.run( pTruth, *pdelay ); - int val = acd.compute_decomposition(); - if ( val != 0 ) - { - *pdelay = 0; - return -1; - } + acd.compute_decomposition(); + *pdelay = acd.get_profile(); + *cost = 2; + + return val; +} - *pdelay = acd.get_profile(); +int acd2_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ) +{ + using namespace acd; - acd.get_decomposition( decomposition ); - return 0; + acdXX_params ps; + ps.lut_size = lutSize; + ps.max_shared_vars = lutSize - 2; + acdXX_impl acd( nVars, ps ); + acd.run( pTruth, *pdelay ); + int val = acd.compute_decomposition(); + + if ( val != 0 ) + { + *pdelay = 0; + return -1; } + + *pdelay = acd.get_profile(); + + acd.get_decomposition( decomposition ); + return 0; } inline int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition ) diff --git a/src/map/if/acd/ac_wrapper.h b/src/map/if/acd/ac_wrapper.h index a4c4bff9b..edb45ca7f 100644 --- a/src/map/if/acd/ac_wrapper.h +++ b/src/map/if/acd/ac_wrapper.h @@ -34,8 +34,9 @@ ABC_NAMESPACE_HEADER_START int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); +int acd2_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); +int acd2_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); -int acd66_decompose( word * pTruth, unsigned nVars, unsigned char *decomposition ); int acdXX_decompose( word * pTruth, unsigned lutSize, unsigned nVars, unsigned char *decomposition ); ABC_NAMESPACE_HEADER_END diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index 500dab361..ea7bfe951 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -138,9 +138,20 @@ class acd66_impl if ( best_multiplicity == UINT32_MAX ) return -1; - for ( uint32_t i = 0; i < best_free_set; ++i ) + if ( bs_support_size == UINT32_MAX ) + { + for ( uint32_t i = 0; i < best_free_set; ++i ) + { + profile |= 1 << permutations[i]; + } + } + else { - profile |= 1 << permutations[i]; + for ( uint32_t i = 0; i < bs_support_size; ++i ) + { + profile |= 1 << permutations[bs_support[i] + best_free_set]; + } + profile = ~profile & ( ( 1u << num_vars ) - 1 ); } return profile; @@ -1074,7 +1085,7 @@ class acd66_impl ++bytes; /* write support */ - for ( uint32_t i = best_free_set; i < best_free_set; ++i ) + for ( uint32_t i = 0; i < best_free_set; ++i ) { *pArray = (unsigned char)permutations[i]; pArray++; diff --git a/src/map/if/acd/acdXX.hpp b/src/map/if/acd/acdXX.hpp index 74d5df8d9..8dabc725e 100644 --- a/src/map/if/acd/acdXX.hpp +++ b/src/map/if/acd/acdXX.hpp @@ -155,10 +155,21 @@ class acdXX_impl if ( best_multiplicity == UINT32_MAX ) return -1; - - for ( uint32_t i = 0; i < best_free_set; ++i ) + + if ( bs_support_size == UINT32_MAX ) { - profile |= 1 << permutations[i]; + for ( uint32_t i = 0; i < best_free_set; ++i ) + { + profile |= 1 << permutations[i]; + } + } + else + { + for ( uint32_t i = 0; i < bs_support_size; ++i ) + { + profile |= 1 << permutations[bs_support[i] + best_free_set]; + } + profile = ~profile & ( ( 1u << num_vars ) - 1 ); } return profile; @@ -1101,7 +1112,7 @@ class acdXX_impl ++bytes; /* write support */ - for ( uint32_t i = best_free_set; i < best_free_set; ++i ) + for ( uint32_t i = 0; i < best_free_set; ++i ) { *pArray = (unsigned char)permutations[i]; pArray++; From 6052d10fdec4884d5bfdb134a874264185ce0d94 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 11 Apr 2024 15:45:37 +0200 Subject: [PATCH 10/11] Adding new command if -U for 2-LUT decompositions under delay profile --- src/base/abci/abc.c | 21 ++++++++-- src/base/abci/abcIf.c | 27 ++++++++----- src/map/if/if.h | 6 ++- src/map/if/ifCore.c | 1 + src/map/if/ifCut.c | 2 +- src/map/if/ifDelay.c | 89 +++++++++++++++++++++++++++++++++++++++++++ src/map/if/ifMap.c | 11 ++++-- src/map/if/ifTime.c | 2 +- 8 files changed, 140 insertions(+), 19 deletions(-) diff --git a/src/base/abci/abc.c b/src/base/abci/abc.c index 36c8f698b..778e7f319 100644 --- a/src/base/abci/abc.c +++ b/src/base/abci/abc.c @@ -19514,7 +19514,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) If_ManSetDefaultPars( pPars ); pPars->pLutLib = (If_LibLut_t *)Abc_FrameReadLibLut(); Extra_UtilGetoptReset(); - while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYZDEWSJqaflepmrsdbgxyuojiktncvh" ) ) != EOF ) + while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYUZDEWSJqaflepmrsdbgxyuojiktncvh" ) ) != EOF ) { switch ( c ) { @@ -19630,6 +19630,18 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) if ( pPars->nAndDelay < 0 ) goto usage; break; + case 'U': + if ( globalUtilOptind >= argc ) + { + Abc_Print( -1, "Command line switch \"-U\" should be followed by a positive integer 3, 4, 5, or 6.\n" ); + goto usage; + } + pPars->nLutDecSize = atoi(argv[globalUtilOptind]); + pPars->fUserLut2D = 1; + globalUtilOptind++; + if ( pPars->nLutDecSize < 3 || pPars->nLutDecSize > 6 ) + goto usage; + break; case 'Z': if ( globalUtilOptind >= argc ) { @@ -19892,7 +19904,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) pPars->fCutMin = 1; } - if ( pPars->fUserLutDec ) + if ( pPars->fUserLutDec || pPars->fUserLut2D ) { if ( pPars->nLutDecSize == 0 ) { @@ -19927,7 +19939,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) pPars->pLutLib = NULL; } // modify for delay optimization - if ( pPars->fDelayOpt || pPars->fDsdBalance || pPars->fDelayOptLut || pPars->fUserLutDec ) + if ( pPars->fDelayOpt || pPars->fDsdBalance || pPars->fDelayOptLut || pPars->fUserLutDec || pPars->fUserLut2D ) { pPars->fTruth = 1; pPars->fCutMin = 1; @@ -20073,7 +20085,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) sprintf(LutSize, "library" ); else sprintf(LutSize, "%d", pPars->nLutSize ); - Abc_Print( -2, "usage: if [-KCFAGRNTXYZ num] [-DEW float] [-S str] [-qarlepmsdbgxyuojiktncvh]\n" ); + Abc_Print( -2, "usage: if [-KCFAGRNTXYUZ num] [-DEW float] [-SJ str] [-qarlepmsdbgxyuojiktncvh]\n" ); Abc_Print( -2, "\t performs FPGA technology mapping of the network\n" ); Abc_Print( -2, "\t-K num : the number of LUT inputs (2 < num < %d) [default = %s]\n", IF_MAX_LUTSIZE+1, LutSize ); Abc_Print( -2, "\t-C num : the max number of priority cuts (0 < num < 2^12) [default = %d]\n", pPars->nCutsMax ); @@ -20085,6 +20097,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv ) Abc_Print( -2, "\t-T num : the type of LUT structures [default = any]\n" ); Abc_Print( -2, "\t-X num : delay of AND-gate in LUT library units [default = %d]\n", pPars->nAndDelay ); Abc_Print( -2, "\t-Y num : area of AND-gate in LUT library units [default = %d]\n", pPars->nAndArea ); + Abc_Print( -2, "\t-U num : the number of LUT inputs for delay-driven LUT decomposition [default = not used]\n" ); Abc_Print( -2, "\t-Z num : the number of LUT inputs for delay-driven LUT decomposition [default = not used]\n" ); Abc_Print( -2, "\t-D float : sets the delay constraint for the mapping [default = %s]\n", Buffer ); Abc_Print( -2, "\t-E float : sets epsilon used for tie-breaking [default = %f]\n", pPars->Epsilon ); diff --git a/src/base/abci/abcIf.c b/src/base/abci/abcIf.c index 357d7d83f..8c4ac1448 100644 --- a/src/base/abci/abcIf.c +++ b/src/base/abci/abcIf.c @@ -116,7 +116,7 @@ Abc_Ntk_t * Abc_NtkIf( Abc_Ntk_t * pNtk, If_Par_t * pPars ) pPars->pTimesReq = Abc_NtkGetCoRequiredFloats(pNtk); // update timing info to reflect logic level - if ( (pPars->fDelayOpt || pPars->fDsdBalance || pPars->fUserRecLib || pPars->fUserSesLib || pPars->fUserLutDec) && pNtk->pManTime ) + if ( (pPars->fDelayOpt || pPars->fDsdBalance || pPars->fUserRecLib || pPars->fUserSesLib || pPars->fUserLutDec || pPars->fUserLut2D ) && pNtk->pManTime ) { int c; if ( pNtk->AndGateDelay == 0.0 ) @@ -433,8 +433,8 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC SideEffects [] SeeAlso [] ***********************************************************************/ - void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCutBest, If_Obj_t * pIfObj, Vec_Int_t * vCover, Abc_Obj_t * pNodeTop ) - { +void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCutBest, If_Obj_t * pIfObj, Vec_Int_t * vCover, Abc_Obj_t * pNodeTop ) +{ extern Hop_Obj_t * Kit_TruthToHop( Hop_Man_t * pMan, unsigned * pTruth, int nVars, Vec_Int_t * vMemory ); assert( !pIfMan->pPars->fUseTtPerm ); @@ -460,7 +460,15 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC // perform LUT-decomposition and return the LUT-structure unsigned char decompArray[92]; - int val = acd_decompose( pTruth, pCutBest->nLeaves, pIfMan->pPars->nLutDecSize, &(delayProfile), decompArray ); + int val; + if ( pIfMan->pPars->fUserLutDec ) + { + val = acd_decompose( pTruth, pCutBest->nLeaves, pIfMan->pPars->nLutDecSize, &(delayProfile), decompArray ); + } + else + { + val = acd2_decompose( pTruth, pCutBest->nLeaves, pIfMan->pPars->nLutDecSize, &(delayProfile), decompArray ); + } assert( val == 0 ); // convert the LUT-structure into a set of logic nodes in Abc_Ntk_t @@ -473,7 +481,7 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC word *tt; Abc_Obj_t *pNewNodes[5]; - /* create intermediate LUTs*/ + /* create intermediate LUTs */ assert( decompArray[1] <= 6 ); Abc_Obj_t * pFanin; for ( i = 0; i < decompArray[1]; ++i ) @@ -537,7 +545,7 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC /* check correct read */ assert( byte_p == decompArray[0] ); - } +} /**Function************************************************************* @@ -577,14 +585,15 @@ Abc_Obj_t * Abc_NodeFromIf_rec( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Obj_t pNodeNew = Abc_NtkCreateNode( pNtkNew ); // if ( pIfMan->pPars->pLutLib && pIfMan->pPars->pLutLib->fVarPinDelays ) if ( !pIfMan->pPars->fDelayOpt && !pIfMan->pPars->fDelayOptLut && !pIfMan->pPars->fDsdBalance && !pIfMan->pPars->fUseTtPerm && - !pIfMan->pPars->pLutStruct && !pIfMan->pPars->fUserLutDec && !pIfMan->pPars->fUserRecLib && !pIfMan->pPars->fUserSesLib && !pIfMan->pPars->nGateSize ) + !pIfMan->pPars->pLutStruct && !pIfMan->pPars->fUserLutDec && !pIfMan->pPars->fUserLut2D && !pIfMan->pPars->fUserRecLib && + !pIfMan->pPars->fUserSesLib && !pIfMan->pPars->nGateSize ) If_CutRotatePins( pIfMan, pCutBest ); if ( pIfMan->pPars->fUseCnfs || pIfMan->pPars->fUseMv ) { If_CutForEachLeafReverse( pIfMan, pCutBest, pIfLeaf, i ) Abc_ObjAddFanin( pNodeNew, Abc_NodeFromIf_rec(pNtkNew, pIfMan, pIfLeaf, vCover) ); } - else if ( pIfMan->pPars->fUserLutDec ) + else if ( pIfMan->pPars->fUserLutDec || pIfMan->pPars->fUserLut2D ) { If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, i ) Abc_NodeFromIf_rec(pNtkNew, pIfMan, pIfLeaf, vCover); @@ -642,7 +651,7 @@ Abc_Obj_t * Abc_NodeFromIf_rec( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Obj_t extern Hop_Obj_t * Abc_RecToHop3( Hop_Man_t * pMan, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj ); pNodeNew->pData = Abc_RecToHop3( (Hop_Man_t *)pNtkNew->pManFunc, pIfMan, pCutBest, pIfObj ); } - else if ( pIfMan->pPars->fUserLutDec ) + else if ( pIfMan->pPars->fUserLutDec || pIfMan->pPars->fUserLut2D ) { extern void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj, Vec_Int_t * vMemory, Abc_Obj_t * pNodeTop ); Abc_DecRecordToHop( pNtkNew, pIfMan, pCutBest, pIfObj, vCover, pNodeNew ); diff --git a/src/map/if/if.h b/src/map/if/if.h index 47fe109ca..723fda780 100644 --- a/src/map/if/if.h +++ b/src/map/if/if.h @@ -147,7 +147,8 @@ struct If_Par_t_ int fDeriveLuts; // enables deriving LUT structures int fDoAverage; // optimize average rather than maximum level int fHashMapping; // perform AIG hashing after mapping - int fUserLutDec; // perform AIG hashing after mapping + int fUserLutDec; // perform Boolean decomposition during mapping + int fUserLut2D; // perform Boolean decomposition during mapping int fVerbose; // the verbosity flag int fVerboseTrace; // the verbosity flag char * pLutStruct; // LUT structure @@ -573,6 +574,7 @@ extern int If_CutSopBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, extern int If_CutLutBalanceEval( If_Man_t * p, If_Cut_t * pCut ); extern int If_CutLutBalancePinDelays( If_Man_t * p, If_Cut_t * pCut, char * pPerm ); extern int If_LutDecEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, int fFirst ); +extern int If_Lut2DecEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, int fFirst ); extern int If_LutDecReEval( If_Man_t * p, If_Cut_t * pCut ); extern float If_LutDecPinRequired( If_Man_t * p, If_Cut_t * pCut, int i, float required ); /*=== ifDsd.c =============================================================*/ @@ -704,6 +706,8 @@ extern void If_ObjPrint( If_Obj_t * pObj ); extern int acd_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); extern int acd_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); +extern int acd2_evaluate( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned *cost, int try_no_late_arrival ); +extern int acd2_decompose( word * pTruth, unsigned nVars, int lutSize, unsigned *pdelay, unsigned char *decomposition ); ABC_NAMESPACE_HEADER_END diff --git a/src/map/if/ifCore.c b/src/map/if/ifCore.c index f7fcbca66..eed6a55e8 100644 --- a/src/map/if/ifCore.c +++ b/src/map/if/ifCore.c @@ -63,6 +63,7 @@ void If_ManSetDefaultPars( If_Par_t * pPars ) pPars->fCutMin = 0; pPars->fBidec = 0; pPars->fUserLutDec = 0; + pPars->fUserLut2D = 0; pPars->fVerbose = 0; } diff --git a/src/map/if/ifCut.c b/src/map/if/ifCut.c index 49850d313..c9354b339 100644 --- a/src/map/if/ifCut.c +++ b/src/map/if/ifCut.c @@ -761,7 +761,7 @@ void If_CutSort( If_Man_t * p, If_Set_t * pCutSet, If_Cut_t * pCut ) if ( !pCut->fUseless && (p->pPars->fUseDsd || p->pPars->pFuncCell2 || p->pPars->fUseBat || - p->pPars->pLutStruct || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || + p->pPars->pLutStruct || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || p->pPars->fUserLut2D || p->pPars->fEnableCheck07 || p->pPars->fUseCofVars || p->pPars->fUseAndVars || p->pPars->fUse34Spec || p->pPars->fUseDsdTune || p->pPars->fEnableCheck75 || p->pPars->fEnableCheck75u || p->pPars->fUseCheck1 || p->pPars->fUseCheck2) ) { diff --git a/src/map/if/ifDelay.c b/src/map/if/ifDelay.c index 3514327c1..0f94de49a 100644 --- a/src/map/if/ifDelay.c +++ b/src/map/if/ifDelay.c @@ -505,6 +505,95 @@ int If_LutDecEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, return DelayMax + val; } +int If_Lut2DecEval( If_Man_t * p, If_Cut_t * pCut, If_Obj_t * pObj, int optDelay, int fFirst ) +{ + pCut->fUser = 1; + pCut->Cost = pCut->nLeaves > 1 ? 1 : 0; + pCut->decDelay = 0; + if ( pCut->nLeaves == 0 ) // const + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 0 ); + return 0; + } + if ( pCut->nLeaves == 1 ) // variable + { + assert( Abc_Lit2Var(If_CutTruthLit(pCut)) == 1 ); + return (int)If_ObjCutBest(If_CutLeaf(p, pCut, 0))->Delay; + } + + int LutSize = p->pPars->nLutDecSize; + int i, leaf_delay; + int DelayMax = -1, nLeafMax = 0; + unsigned uLeafMask = 0; + for ( i = 0; i < If_CutLeaveNum(pCut); i++ ) + { + leaf_delay = If_ObjCutBest(If_CutLeaf(p, pCut, i))->Delay; + + if ( DelayMax < leaf_delay ) + { + DelayMax = leaf_delay; + nLeafMax = 1; + uLeafMask = (1 << i); + } + else if ( DelayMax == leaf_delay ) + { + nLeafMax++; + uLeafMask |= (1 << i); + } + } + if ( If_CutLeaveNum(pCut) <= LutSize ) + { + pCut->decDelay = ( 1 << LutSize ) - 1; + return DelayMax + 1; + } + + /* compute the decomposition */ + int use_late_arrival = 0; + unsigned cost = 1; + + if ( !fFirst ) + { + if ( optDelay ) + { + /* checks based on delay: must be better than the previous best cut */ + use_late_arrival = DelayMax + 2 >= If_ObjCutBest(pObj)->Delay; + } + else + { + /* checks based on delay: look at the required time */ + use_late_arrival = DelayMax + 2 > pObj->Required + p->fEpsilon; + } + } + + /* Too many late-arriving signals */ + if ( nLeafMax == LutSize && use_late_arrival ) + { + /* unfeasible decomposition */ + pCut->Cost = IF_COST_MAX; + return ABC_INFINITY; + } + + if ( !use_late_arrival ) + { + uLeafMask = 0; + } + + /* returns the delay of the decomposition */ + word *pTruth = If_CutTruthW( p, pCut ); + int val = acd2_evaluate( pTruth, pCut->nLeaves, LutSize, &uLeafMask, &cost, !use_late_arrival ); + + /* not feasible decomposition */ + pCut->decDelay = uLeafMask; + if ( val < 0 ) + { + pCut->Cost = IF_COST_MAX; + return ABC_INFINITY; + } + + pCut->Cost = 2; + return DelayMax + val; +} + int If_LutDecReEval( If_Man_t * p, If_Cut_t * pCut ) { // pCut->fUser = 1; diff --git a/src/map/if/ifMap.c b/src/map/if/ifMap.c index bdd3ae439..0ad63f050 100644 --- a/src/map/if/ifMap.c +++ b/src/map/if/ifMap.c @@ -166,7 +166,7 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep If_Cut_t * pCut0R, * pCut1R; int fFunc0R, fFunc1R; int i, k, v, iCutDsd, fChange; - int fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || + int fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUserLutDec || p->pPars->fUserLut2D || p->pPars->fUseDsdTune || p->pPars->fUseCofVars || p->pPars->fUseAndVars || p->pPars->fUse34Spec || p->pPars->pLutStruct || p->pPars->pFuncCell2 || p->pPars->fUseCheck1 || p->pPars->fUseCheck2; int fUseAndCut = (p->pPars->nAndDelay > 0) || (p->pPars->nAndArea > 0); assert( !If_ObjIsAnd(pObj->pFanin0) || pObj->pFanin0->pCutSet->nCuts > 0 ); @@ -208,7 +208,7 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep pCut->fUseless = 1; } } - else if ( p->pPars->fUserLutDec ) + else if ( p->pPars->fUserLutDec || p->pPars->fUserLut2D ) { pCut->Delay = If_LutDecReEval( p, pCut ); } @@ -434,6 +434,11 @@ void If_ObjPerformMappingAnd( If_Man_t * p, If_Obj_t * pObj, int Mode, int fPrep pCut->Delay = If_LutDecEval( p, pCut, pObj, Mode == 0, fFirst ); pCut->fUseless = pCut->Delay == ABC_INFINITY; } + else if ( p->pPars->fUserLut2D ) + { + pCut->Delay = If_Lut2DecEval( p, pCut, pObj, Mode == 0, fFirst ); + pCut->fUseless = pCut->Delay == ABC_INFINITY; + } else if ( p->pPars->fUserSesLib ) { int Cost = 0; @@ -518,7 +523,7 @@ void If_ObjPerformMappingChoice( If_Man_t * p, If_Obj_t * pObj, int Mode, int fP If_Set_t * pCutSet; If_Obj_t * pTemp; If_Cut_t * pCutTemp, * pCut; - int i, fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUse34Spec || p->pPars->fUserLutDec; + int i, fSave0 = p->pPars->fDelayOpt || p->pPars->fDelayOptLut || p->pPars->fDsdBalance || p->pPars->fUserRecLib || p->pPars->fUserSesLib || p->pPars->fUse34Spec || p->pPars->fUserLutDec || p->pPars->fUserLut2D; assert( pObj->pEquiv != NULL ); // prepare diff --git a/src/map/if/ifTime.c b/src/map/if/ifTime.c index f20842384..a1b51d6ad 100644 --- a/src/map/if/ifTime.c +++ b/src/map/if/ifTime.c @@ -211,7 +211,7 @@ void If_CutPropagateRequired( If_Man_t * p, If_Obj_t * pObj, If_Cut_t * pCut, fl pLeaf->Required = IF_MIN( pLeaf->Required, Required - pLutDelays[0] ); } } - else if ( p->pPars->fUserLutDec ) + else if ( p->pPars->fUserLutDec || p->pPars->fUserLut2D ) { Required = ObjRequired; If_CutForEachLeaf( p, pCut, pLeaf, i ) From 0c905f873b57fa3e6e6744cd49301f51bfd57f48 Mon Sep 17 00:00:00 2001 From: aletempiac Date: Thu, 11 Apr 2024 19:01:05 +0200 Subject: [PATCH 11/11] Fixes --- src/map/if/acd/acd66.hpp | 4 ++++ src/map/if/acd/acdXX.hpp | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/map/if/acd/acd66.hpp b/src/map/if/acd/acd66.hpp index ea7bfe951..109512986 100644 --- a/src/map/if/acd/acd66.hpp +++ b/src/map/if/acd/acd66.hpp @@ -555,6 +555,7 @@ class acd66_impl } } while ( combinations_next( free_set_size, 0, pComb, pInvPerm, tt ) ); + best_multiplicity = UINT32_MAX; return false; } @@ -602,6 +603,8 @@ class acd66_impl return true; } } + + best_multiplicity = UINT32_MAX; return false; } @@ -665,6 +668,7 @@ class acd66_impl } } while ( combinations_next( free_set_size, offset, pComb, pInvPerm, tt ) ); + best_multiplicity = UINT32_MAX; return false; } diff --git a/src/map/if/acd/acdXX.hpp b/src/map/if/acd/acdXX.hpp index 8dabc725e..e43404ea9 100644 --- a/src/map/if/acd/acdXX.hpp +++ b/src/map/if/acd/acdXX.hpp @@ -571,6 +571,7 @@ class acdXX_impl } } while ( combinations_next( free_set_size, 0, pComb, pInvPerm, tt ) ); + best_multiplicity = UINT32_MAX; return false; } @@ -620,6 +621,8 @@ class acdXX_impl return true; } } + + best_multiplicity = UINT32_MAX; return false; } @@ -683,6 +686,7 @@ class acdXX_impl } } while ( combinations_next( free_set_size, offset, pComb, pInvPerm, tt ) ); + best_multiplicity = UINT32_MAX; return false; }