Skip to content

Commit

Permalink
Merge pull request #284 from aletempiac/acd66
Browse files Browse the repository at this point in the history
LUT structure mapping
  • Loading branch information
alanminko authored Apr 11, 2024
2 parents ca78f5e + 045803d commit b10d000
Show file tree
Hide file tree
Showing 15 changed files with 1,603 additions and 1,686 deletions.
25 changes: 19 additions & 6 deletions src/base/abci/abc.c
Original file line number Diff line number Diff line change
Expand Up @@ -19541,7 +19541,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv )
If_ManSetDefaultPars( pPars );
pPars->pLutLib = (If_LibLut_t *)Abc_FrameReadLibLut();
Extra_UtilGetoptReset();
while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYZDEWSJqaflepmrsdbgxyuojiktncvh" ) ) != EOF )
while ( ( c = Extra_UtilGetopt( argc, argv, "KCFAGRNTXYUZDEWSJqaflepmrsdbgxyuojiktncvh" ) ) != EOF )
{
switch ( c )
{
Expand Down Expand Up @@ -19657,6 +19657,18 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv )
if ( pPars->nAndDelay < 0 )
goto usage;
break;
case 'U':
if ( globalUtilOptind >= argc )
{
Abc_Print( -1, "Command line switch \"-U\" should be followed by a positive integer 3, 4, 5, or 6.\n" );
goto usage;
}
pPars->nLutDecSize = atoi(argv[globalUtilOptind]);
pPars->fUserLut2D = 1;
globalUtilOptind++;
if ( pPars->nLutDecSize < 3 || pPars->nLutDecSize > 6 )
goto usage;
break;
case 'Z':
if ( globalUtilOptind >= argc )
{
Expand Down Expand Up @@ -19727,7 +19739,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv )
globalUtilOptind++;
if ( strlen(pPars->pLutStruct) != 2 && strlen(pPars->pLutStruct) != 3 )
{
Abc_Print( -1, "Command line switch \"-J\" should be followed by a 2- or 3-char string (e.g. \"66\" or \"666\").\n" );
Abc_Print( -1, "Command line switch \"-J\" should be followed by a 2-char string (e.g. \"44\" or \"66\" \").\n" );
goto usage;
}
break;
Expand Down Expand Up @@ -19910,7 +19922,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv )
}
if ( pPars->fEnableStructN )
{
pPars->pFuncCell = pPars->fDelayOptLut ? NULL : If_CutPerformCheck66;
pPars->pFuncCell = pPars->fDelayOptLut ? NULL : If_CutPerformCheckXX;
}
else
{
Expand All @@ -19919,7 +19931,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv )
pPars->fCutMin = 1;
}

if ( pPars->fUserLutDec )
if ( pPars->fUserLutDec || pPars->fUserLut2D )
{
if ( pPars->nLutDecSize == 0 )
{
Expand Down Expand Up @@ -19954,7 +19966,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv )
pPars->pLutLib = NULL;
}
// modify for delay optimization
if ( pPars->fDelayOpt || pPars->fDsdBalance || pPars->fDelayOptLut || pPars->fUserLutDec )
if ( pPars->fDelayOpt || pPars->fDsdBalance || pPars->fDelayOptLut || pPars->fUserLutDec || pPars->fUserLut2D )
{
pPars->fTruth = 1;
pPars->fCutMin = 1;
Expand Down Expand Up @@ -20100,7 +20112,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv )
sprintf(LutSize, "library" );
else
sprintf(LutSize, "%d", pPars->nLutSize );
Abc_Print( -2, "usage: if [-KCFAGRNTXYZ num] [-DEW float] [-S str] [-qarlepmsdbgxyuojiktncvh]\n" );
Abc_Print( -2, "usage: if [-KCFAGRNTXYUZ num] [-DEW float] [-SJ str] [-qarlepmsdbgxyuojiktncvh]\n" );
Abc_Print( -2, "\t performs FPGA technology mapping of the network\n" );
Abc_Print( -2, "\t-K num : the number of LUT inputs (2 < num < %d) [default = %s]\n", IF_MAX_LUTSIZE+1, LutSize );
Abc_Print( -2, "\t-C num : the max number of priority cuts (0 < num < 2^12) [default = %d]\n", pPars->nCutsMax );
Expand All @@ -20112,6 +20124,7 @@ int Abc_CommandIf( Abc_Frame_t * pAbc, int argc, char ** argv )
Abc_Print( -2, "\t-T num : the type of LUT structures [default = any]\n" );
Abc_Print( -2, "\t-X num : delay of AND-gate in LUT library units [default = %d]\n", pPars->nAndDelay );
Abc_Print( -2, "\t-Y num : area of AND-gate in LUT library units [default = %d]\n", pPars->nAndArea );
Abc_Print( -2, "\t-U num : the number of LUT inputs for delay-driven LUT decomposition [default = not used]\n" );
Abc_Print( -2, "\t-Z num : the number of LUT inputs for delay-driven LUT decomposition [default = not used]\n" );
Abc_Print( -2, "\t-D float : sets the delay constraint for the mapping [default = %s]\n", Buffer );
Abc_Print( -2, "\t-E float : sets epsilon used for tie-breaking [default = %f]\n", pPars->Epsilon );
Expand Down
27 changes: 18 additions & 9 deletions src/base/abci/abcIf.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ Abc_Ntk_t * Abc_NtkIf( Abc_Ntk_t * pNtk, If_Par_t * pPars )
pPars->pTimesReq = Abc_NtkGetCoRequiredFloats(pNtk);

// update timing info to reflect logic level
if ( (pPars->fDelayOpt || pPars->fDsdBalance || pPars->fUserRecLib || pPars->fUserSesLib || pPars->fUserLutDec) && pNtk->pManTime )
if ( (pPars->fDelayOpt || pPars->fDsdBalance || pPars->fUserRecLib || pPars->fUserSesLib || pPars->fUserLutDec || pPars->fUserLut2D ) && pNtk->pManTime )
{
int c;
if ( pNtk->AndGateDelay == 0.0 )
Expand Down Expand Up @@ -433,8 +433,8 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC
SideEffects []
SeeAlso []
***********************************************************************/
void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCutBest, If_Obj_t * pIfObj, Vec_Int_t * vCover, Abc_Obj_t * pNodeTop )
{
void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCutBest, If_Obj_t * pIfObj, Vec_Int_t * vCover, Abc_Obj_t * pNodeTop )
{
extern Hop_Obj_t * Kit_TruthToHop( Hop_Man_t * pMan, unsigned * pTruth, int nVars, Vec_Int_t * vMemory );
assert( !pIfMan->pPars->fUseTtPerm );

Expand All @@ -460,7 +460,15 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC

// perform LUT-decomposition and return the LUT-structure
unsigned char decompArray[92];
int val = acd_decompose( pTruth, pCutBest->nLeaves, pIfMan->pPars->nLutDecSize, &(delayProfile), decompArray );
int val;
if ( pIfMan->pPars->fUserLutDec )
{
val = acd_decompose( pTruth, pCutBest->nLeaves, pIfMan->pPars->nLutDecSize, &(delayProfile), decompArray );
}
else
{
val = acd2_decompose( pTruth, pCutBest->nLeaves, pIfMan->pPars->nLutDecSize, &(delayProfile), decompArray );
}
assert( val == 0 );

// convert the LUT-structure into a set of logic nodes in Abc_Ntk_t
Expand All @@ -473,7 +481,7 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC
word *tt;
Abc_Obj_t *pNewNodes[5];

/* create intermediate LUTs*/
/* create intermediate LUTs */
assert( decompArray[1] <= 6 );
Abc_Obj_t * pFanin;
for ( i = 0; i < decompArray[1]; ++i )
Expand Down Expand Up @@ -537,7 +545,7 @@ Hop_Obj_t * Abc_NodeBuildFromMini( Hop_Man_t * pMan, If_Man_t * p, If_Cut_t * pC

/* check correct read */
assert( byte_p == decompArray[0] );
}
}

/**Function*************************************************************
Expand Down Expand Up @@ -577,14 +585,15 @@ Abc_Obj_t * Abc_NodeFromIf_rec( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Obj_t
pNodeNew = Abc_NtkCreateNode( pNtkNew );
// if ( pIfMan->pPars->pLutLib && pIfMan->pPars->pLutLib->fVarPinDelays )
if ( !pIfMan->pPars->fDelayOpt && !pIfMan->pPars->fDelayOptLut && !pIfMan->pPars->fDsdBalance && !pIfMan->pPars->fUseTtPerm &&
!pIfMan->pPars->pLutStruct && !pIfMan->pPars->fUserLutDec && !pIfMan->pPars->fUserRecLib && !pIfMan->pPars->fUserSesLib && !pIfMan->pPars->nGateSize )
!pIfMan->pPars->pLutStruct && !pIfMan->pPars->fUserLutDec && !pIfMan->pPars->fUserLut2D && !pIfMan->pPars->fUserRecLib &&
!pIfMan->pPars->fUserSesLib && !pIfMan->pPars->nGateSize )
If_CutRotatePins( pIfMan, pCutBest );
if ( pIfMan->pPars->fUseCnfs || pIfMan->pPars->fUseMv )
{
If_CutForEachLeafReverse( pIfMan, pCutBest, pIfLeaf, i )
Abc_ObjAddFanin( pNodeNew, Abc_NodeFromIf_rec(pNtkNew, pIfMan, pIfLeaf, vCover) );
}
else if ( pIfMan->pPars->fUserLutDec )
else if ( pIfMan->pPars->fUserLutDec || pIfMan->pPars->fUserLut2D )
{
If_CutForEachLeaf( pIfMan, pCutBest, pIfLeaf, i )
Abc_NodeFromIf_rec(pNtkNew, pIfMan, pIfLeaf, vCover);
Expand Down Expand Up @@ -642,7 +651,7 @@ Abc_Obj_t * Abc_NodeFromIf_rec( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Obj_t
extern Hop_Obj_t * Abc_RecToHop3( Hop_Man_t * pMan, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj );
pNodeNew->pData = Abc_RecToHop3( (Hop_Man_t *)pNtkNew->pManFunc, pIfMan, pCutBest, pIfObj );
}
else if ( pIfMan->pPars->fUserLutDec )
else if ( pIfMan->pPars->fUserLutDec || pIfMan->pPars->fUserLut2D )
{
extern void Abc_DecRecordToHop( Abc_Ntk_t * pNtkNew, If_Man_t * pIfMan, If_Cut_t * pCut, If_Obj_t * pIfObj, Vec_Int_t * vMemory, Abc_Obj_t * pNodeTop );
Abc_DecRecordToHop( pNtkNew, pIfMan, pCutBest, pIfObj, vCover, pNodeNew );
Expand Down
81 changes: 56 additions & 25 deletions src/map/if/acd/ac_decomposition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,12 @@ class ac_decomposition_impl
}
if ( late_arriving > ps.max_free_set_vars )
{
ps.max_free_set_vars = late_arriving;
return -1; /* on average avoiding this computation leads to better quality */
// ps.max_free_set_vars = late_arriving;
}

/* return a high cost if too many late arriving variables */
if ( late_arriving > ps.lut_size - 1 || late_arriving > ps.max_free_set_vars )
if ( late_arriving > ps.lut_size - 1 )
{
return -1;
}
Expand Down Expand Up @@ -231,7 +232,7 @@ class ac_decomposition_impl
best_cost = multiplicity + additional_cost;
best_free_set = i;

if ( !ps.use_first )
if ( !ps.use_first && multiplicity > 2 )
{
continue;
}
Expand Down Expand Up @@ -269,7 +270,7 @@ class ac_decomposition_impl
best_cost = multiplicity + additional_cost;
best_free_set = i;

if ( !ps.use_first )
if ( !ps.use_first && multiplicity > 2 )
{
continue;
}
Expand Down Expand Up @@ -477,6 +478,13 @@ class ac_decomposition_impl
pComb[i] = pInvPerm[i] = i;
}

/* early bail-out conditions */
uint32_t bail_multiplicity = 2;
if ( best_multiplicity < UINT32_MAX )
{
bail_multiplicity = ( best_multiplicity >> 1 ) + ( best_multiplicity & 1 );
}

/* enumerate combinations */
do
{
Expand All @@ -489,6 +497,11 @@ class ac_decomposition_impl
{
bestPerm[i] = pComb[i];
}

if ( best_cost <= bail_multiplicity )
{
break;
}
}
} while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) );

Expand Down Expand Up @@ -632,13 +645,10 @@ class ac_decomposition_impl
uint32_t k = 0;
for ( uint32_t j = 0; j < num_vars - best_free_set; ++j )
{
if ( !kitty::has_var( tt, j ) )
continue;

if ( !kitty::has_var( tt, care, j ) )
{
/* fix truth table */
adjust_truth_table_on_dc( tt, care, j );
adjust_truth_table_on_dc( tt, care, tt.num_vars(), j );
continue;
}

Expand Down Expand Up @@ -788,7 +798,14 @@ class ac_decomposition_impl
}
}
support_minimization_encodings = std::vector<std::array<uint32_t, 2>>( num_combs );
generate_support_minimization_encodings_rec<false>( 0, 0, 0, count );
generate_support_minimization_encodings_rec<false, true>( 0, 0, 0, count );
}
else if ( best_multiplicity > 8 )
{
/* combinations are 2^(mu - 1) */
num_combs = 1u << ( best_multiplicity - 1 );
support_minimization_encodings = std::vector<std::array<uint32_t, 2>>( num_combs );
generate_support_minimization_encodings_rec<false, false>( 0, 0, 0, count );
}
else
{
Expand All @@ -798,18 +815,18 @@ class ac_decomposition_impl
num_combs = ( num_combs << 1 ) + num_combs;
}
support_minimization_encodings = std::vector<std::array<uint32_t, 2>>( num_combs );
generate_support_minimization_encodings_rec<true>( 0, 0, 0, count );
generate_support_minimization_encodings_rec<true, false>( 0, 0, 0, count );
}

assert( count == num_combs );
}

template<bool enable_dcset>
template<bool enable_dcset, bool equal_size_partition>
void generate_support_minimization_encodings_rec( uint32_t onset, uint32_t offset, uint32_t var, uint32_t& count )
{
if ( var == best_multiplicity )
{
if ( !enable_dcset )
if ( equal_size_partition )
{
/* sets must be equally populated */
if ( __builtin_popcount( onset ) != __builtin_popcount( offset ) )
Expand All @@ -827,12 +844,12 @@ class ac_decomposition_impl
/* var in DCSET */
if ( enable_dcset )
{
generate_support_minimization_encodings_rec<enable_dcset>( onset, offset, var + 1, count );
generate_support_minimization_encodings_rec<enable_dcset, equal_size_partition>( onset, offset, var + 1, count );
}

/* move var in ONSET */
onset |= 1 << var;
generate_support_minimization_encodings_rec<enable_dcset>( onset, offset, var + 1, count );
generate_support_minimization_encodings_rec<enable_dcset, equal_size_partition>( onset, offset, var + 1, count );
onset &= ~( 1 << var );

/* remove symmetries */
Expand All @@ -843,7 +860,7 @@ class ac_decomposition_impl

/* move var in OFFSET */
offset |= 1 << var;
generate_support_minimization_encodings_rec<enable_dcset>( onset, offset, var + 1, count );
generate_support_minimization_encodings_rec<enable_dcset, equal_size_partition>( onset, offset, var + 1, count );
offset &= ~( 1 << var );
}

Expand Down Expand Up @@ -1042,6 +1059,13 @@ class ac_decomposition_impl
for ( uint32_t j = 0; j < iset_support; ++j )
{
cost += has_var_support( tt, care, iset_support, j ) ? 1 : 0;
// if ( !has_var_support( tt, care, iset_support, j ) )
// {
// /* adjust truth table and care set */
// adjust_truth_table_on_dc( tt, care, iset_support, j );
// continue;
// }
// ++cost;
}

/* discard solutions with support over LUT size */
Expand Down Expand Up @@ -1186,7 +1210,7 @@ class ac_decomposition_impl
return res;
}

bool covering_improve( std::vector<encoding_column>& matrix, std::array<uint32_t, 6>& solution )
bool covering_improve( std::vector<encoding_column> const& matrix, std::array<uint32_t, 6>& solution )
{
/* performs one iteration of local search */
uint32_t best_cost = 0, local_cost = 0;
Expand Down Expand Up @@ -1233,21 +1257,23 @@ class ac_decomposition_impl
return improved;
}

void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t var_index )
void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t real_num_vars, uint32_t var_index )
{
assert( var_index < tt.num_vars() );
assert( var_index < real_num_vars );
assert( tt.num_vars() == care.num_vars() );

if ( tt.num_vars() <= 6 || var_index < 6 )
const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) );
if ( real_num_vars <= 6 || var_index < 6 )
{
auto it_tt = std::begin( tt._bits );
auto it_care = std::begin( care._bits );
while ( it_tt != std::end( tt._bits ) )
while ( it_tt != std::begin( tt._bits ) + num_blocks )
{
uint64_t new_bits = *it_tt & *it_care;
*it_tt = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) |
( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] );
*it_care = *it_care | ( *it_care >> ( uint64_t( 1 ) << var_index ) );
*it_care = ( *it_care | ( *it_care >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index];
*it_care = *it_care | ( *it_care << ( uint64_t( 1 ) << var_index ) );

++it_tt;
++it_care;
Expand All @@ -1256,7 +1282,7 @@ class ac_decomposition_impl
}

const auto step = 1 << ( var_index - 6 );
for ( auto i = 0u; i < static_cast<uint32_t>( tt.num_blocks() ); i += 2 * step )
for ( auto i = 0u; i < static_cast<uint32_t>( num_blocks ); i += 2 * step )
{
for ( auto j = 0; j < step; ++j )
{
Expand Down Expand Up @@ -1343,10 +1369,15 @@ class ac_decomposition_impl
std::swap( var_index1, var_index2 );
}

assert( num_vars > 6 );
const uint32_t num_blocks = 1 << ( num_vars - 6 );
const uint32_t num_blocks = num_vars <= 6 ? 1 : 1 << ( num_vars - 6 );

if ( var_index2 <= 5 )
if ( num_vars <= 6 )
{
const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2];
const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 );
tt._bits[0] = ( tt._bits[0] & pmask[0] ) | ( ( tt._bits[0] & pmask[1] ) << shift ) | ( ( tt._bits[0] & pmask[2] ) >> shift );
}
else if ( var_index2 <= 5 )
{
const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2];
const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 );
Expand Down
Loading

0 comments on commit b10d000

Please sign in to comment.