diff --git a/.gitignore b/.gitignore index 9b148120..76b5902a 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ qrc_* # CLion indexing *.uuid +.fleet # Generated files @@ -32,11 +33,13 @@ qrc_* # Visual Studio Code /.vscode +# Zed +/.cache* + # CLion *.idea *.clion* - # QtCreator CMakeLists.txt.user.* diff --git a/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt b/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt index 2b37ed6f..1a58f36d 100644 --- a/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt +++ b/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt @@ -11,11 +11,6 @@ waLBerla_generate_target_from_python(NAME NonUniformGridCPUGenerated NonUniformGridCPUBoundaryCollection.h NonUniformGridCPUInfoHeader.h) -waLBerla_add_executable( NAME NonUniformGridGenerator - FILES NonUniformGridGenerator.cpp LdcSetup.h - DEPENDS blockforest core field python_coupling ) - - waLBerla_add_executable( NAME NonUniformGridCPU - FILES NonUniformGridCPU.cpp LdcSetup.h + FILES NonUniformGridCPU.cpp LdcSetup.h GridGeneration.h DEPENDS blockforest boundary core domain_decomposition field geometry lbm_generated python_coupling timeloop vtk NonUniformGridCPUGenerated ) diff --git a/apps/benchmarks/NonUniformGridCPU/GridGeneration.h b/apps/benchmarks/NonUniformGridCPU/GridGeneration.h new file mode 100644 index 00000000..ec7069bc --- /dev/null +++ b/apps/benchmarks/NonUniformGridCPU/GridGeneration.h @@ -0,0 +1,144 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \file GridGeneration.h +//! \author Markus Holzer +// +//====================================================================================================================== +#pragma once + +#include "blockforest/Initialization.h" +#include "blockforest/SetupBlock.h" +#include "blockforest/SetupBlockForest.h" +#include "blockforest/loadbalancing/StaticCurve.h" + +#include "core/Environment.h" +#include "core/logging/Initialization.h" +#include "core/timing/RemainingTimeLogger.h" +#include "core/timing/TimingPool.h" + +#include + +#include "LdcSetup.h" +#include "NonUniformGridCPUInfoHeader.h" + +using StorageSpecification_T = lbm::NonUniformGridCPUStorageSpecification; +using Stencil_T = StorageSpecification_T::Stencil; + +using namespace walberla; + +void createSetupBlockForest(SetupBlockForest& setupBfs, + const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup, + const bool useMPIManager=false) +{ + WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...") + + Vector3 domainSize = domainSetup.getParameter >("domainSize"); + Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock"); + Vector3 rootBlocks = domainSetup.getParameter >("rootBlocks"); + Vector3 periodic = domainSetup.getParameter >("periodic"); + + const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1)); + uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses"); + const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest"); + const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false); + const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false); + + if(useMPIManager) + numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses()); + + const LDC ldc(refinementDepth); + + auto refSelection = ldc.refinementSelector(); + setupBfs.addRefinementSelectionFunction(std::function(refSelection)); + const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]); + setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment); + setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]); + setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses); + + if(mpi::MPIManager::instance()->numProcesses() > 1) + return; + + { + std::ostringstream oss; + oss << blockForestFilestem << ".bfs"; + setupBfs.saveToFile(oss.str().c_str()); + } + + if(writeVtk){ + setupBfs.writeVTKOutput(blockForestFilestem); + } + + if(outputStatistics){ + WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================"); + WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks()) + for (uint_t level = 0; level <= refinementDepth; level++) + { + const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level); + WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks) + } + + const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses()); + WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc); + + const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]; + const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]); + + const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q; + const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE); + const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type); + const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9; + const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9; + + WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)") + WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB") + WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB") + + WALBERLA_LOG_INFO_ON_ROOT("================================================================================="); + } +} + +void createBlockForest(shared_ptr< BlockForest >& bfs, + const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup) +{ + if (mpi::MPIManager::instance()->numProcesses() > 1) + { + const std::string blockForestFilestem = + blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest"); + // Load structured block forest from file + std::ostringstream oss; + oss << blockForestFilestem << ".bfs"; + const std::string setupBlockForestFilepath = oss.str(); + std::ifstream infile(setupBlockForestFilepath.c_str()); + if(!infile.good()) + { + WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!") + SetupBlockForest setupBfs; + createSetupBlockForest(setupBfs, domainSetup, blockForestSetup, true); + bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs); + } + else + { + bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), + setupBlockForestFilepath.c_str(), false); + } + } + else + { + SetupBlockForest setupBfs; + createSetupBlockForest(setupBfs, domainSetup, blockForestSetup); + bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs); + } +} \ No newline at end of file diff --git a/apps/benchmarks/NonUniformGridCPU/LdcSetup.h b/apps/benchmarks/NonUniformGridCPU/LdcSetup.h index 070656cb..6fe5a508 100644 --- a/apps/benchmarks/NonUniformGridCPU/LdcSetup.h +++ b/apps/benchmarks/NonUniformGridCPU/LdcSetup.h @@ -48,14 +48,8 @@ class LDCRefinement { const AABB & domain = forest.getDomain(); - const real_t xSize = ( domain.xSize() / real_t(12) ) * real_c( 0.99 ); - const real_t ySize = ( domain.ySize() / real_t(12) ) * real_c( 0.99 ); - - const AABB leftCorner( domain.xMin(), domain.yMin(), domain.zMin(), - domain.xMin() + xSize, domain.yMin() + ySize, domain.zMax() ); - - const AABB rightCorner( domain.xMax() - xSize, domain.yMin(), domain.zMin(), - domain.xMax(), domain.yMin() + ySize, domain.zMax() ); + const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() ); + const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() ); for(auto & block : forest) { diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp index 34fde13e..84b78291 100644 --- a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp +++ b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp @@ -38,6 +38,7 @@ #include +#include "GridGeneration.h" #include "LdcSetup.h" #include "NonUniformGridCPUInfoHeader.h" #include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h" @@ -77,23 +78,25 @@ int main(int argc, char** argv) auto config = *cfg; logging::configureLogging(config); - + auto domainSetup = config->getOneBlock("DomainSetup"); auto blockForestSetup = config->getOneBlock("SetupBlockForest"); + const bool writeSetupForestAndReturn = blockForestSetup.getParameter< bool >("writeSetupForestAndReturn", true); + const std::string blockForestFilestem = blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest"); const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1)); - auto domainSetup = config->getOneBlock("DomainSetup"); Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock"); - // Load structured block forest from file - std::ostringstream oss; - oss << blockForestFilestem << ".bfs"; - const std::string setupBlockForestFilepath = oss.str(); + shared_ptr< BlockForest > bfs; + createBlockForest(bfs, domainSetup, blockForestSetup); + + if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1) + { + WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program") + return EXIT_SUCCESS; + } - WALBERLA_LOG_INFO_ON_ROOT("Creating structured block forest...") - auto bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), - setupBlockForestFilepath.c_str(), false); auto blocks = std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]); blocks->createCellBoundingBoxes(); @@ -173,6 +176,8 @@ int main(int argc, char** argv) const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0); const bool useVTKAMRWriter = parameters.getParameter< bool >("useVTKAMRWriter", false); const bool oneFilePerProcess = parameters.getParameter< bool >("oneFilePerProcess", false); + + auto finalDomain = blocks->getDomain(); if (vtkWriteFrequency > 0) { auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", @@ -180,6 +185,12 @@ int main(int argc, char** argv) auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldID, "vel"); vtkOutput->addCellDataWriter(velWriter); + if (parameters.getParameter< bool >("writeOnlySlice", true)){ + const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - blocks->dz(refinementDepth), + finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + blocks->dz(refinementDepth)); + vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY)); + } + vtkOutput->addBeforeFunction([&]() { for (auto& block : *blocks) sweepCollection.calculateMacroscopicParameters(&block); @@ -236,6 +247,8 @@ int main(int argc, char** argv) pythonCallbackResults.data().exposeValue("numProcesses", performance.processes()); pythonCallbackResults.data().exposeValue("numThreads", performance.threads()); pythonCallbackResults.data().exposeValue("numCores", performance.cores()); + pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells()); + pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells()); pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time)); pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time)); pythonCallbackResults.data().exposeValue("mlupsPerProcess", diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py index 25e93420..368fd569 100644 --- a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py +++ b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py @@ -23,17 +23,23 @@ with CodeGeneration() as ctx: field_type = "float64" if ctx.double_accuracy else "float32" + cpu_vec = {"instruction_set": None} - streaming_pattern = 'aa' + streaming_pattern = 'esopull' timesteps = get_timesteps(streaming_pattern) stencil = LBStencil(Stencil.D3Q19) + method_enum = Method.CUMULANT + + fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False + collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower() assert stencil.D == 3, "This application supports only three-dimensional stencils" pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx') density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx') macroscopic_fields = {'density': density_field, 'velocity': velocity_field} - lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega, compressible=True, + lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True, + fourth_order_correction=fourth_order_correction, streaming_pattern=streaming_pattern) lbm_opt = LBMOptimisation(cse_global=False, field_layout="fzyx") @@ -50,12 +56,12 @@ lbm_config=lbm_config, lbm_optimisation=lbm_opt, nonuniform=True, boundaries=[no_slip, ubb], macroscopic_fields=macroscopic_fields, - target=ps.Target.CPU) + target=ps.Target.CPU, cpu_vectorize_info=cpu_vec,) infoHeaderParams = { 'stencil': stencil.name.lower(), 'streaming_pattern': streaming_pattern, - 'collision_setup': lbm_config.method.name.lower(), + 'collision_setup': collision_setup, 'cse_global': int(lbm_opt.cse_global), 'cse_pdfs': int(lbm_opt.cse_pdfs), } diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridGenerator.cpp b/apps/benchmarks/NonUniformGridCPU/NonUniformGridGenerator.cpp deleted file mode 100644 index d7eab304..00000000 --- a/apps/benchmarks/NonUniformGridCPU/NonUniformGridGenerator.cpp +++ /dev/null @@ -1,98 +0,0 @@ -//====================================================================================================================== -// -// This file is part of waLBerla. waLBerla is free software: you can -// redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of -// the License, or (at your option) any later version. -// -// waLBerla is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// for more details. -// -// You should have received a copy of the GNU General Public License along -// with waLBerla (see COPYING.txt). If not, see . -// -//! \file NonUniformGridGenerator.cpp -//! \author Frederik Hennig -// -//====================================================================================================================== - -#include "blockforest/Initialization.h" -#include "blockforest/SetupBlock.h" -#include "blockforest/SetupBlockForest.h" -#include "blockforest/loadbalancing/StaticCurve.h" - -#include "core/all.h" - -#include "python_coupling/CreateConfig.h" - -#include - -#include "LdcSetup.h" - -using namespace walberla; - - -int main(int argc, char ** argv){ - const mpi::Environment env(argc, argv); - mpi::MPIManager::instance()->useWorldComm(); - - if(mpi::MPIManager::instance()->numProcesses() > 1){ - WALBERLA_ABORT("Commandment: Thou shalt not run thy grid generator with more than one process."); - } - - for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg) - { - auto config = *cfg; - auto domainSetup = config->getOneBlock("DomainSetup"); - - Vector3 domainSize = domainSetup.getParameter >("domainSize"); - Vector3 rootBlocks = domainSetup.getParameter >("rootBlocks"); - Vector3 periodic = domainSetup.getParameter >("periodic"); - - auto blockForestSetup = config->getOneBlock("SetupBlockForest"); - const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1)); - const uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses"); - const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest"); - const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false); - const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false); - - const LDC ldc(refinementDepth); - SetupBlockForest setupBfs; - - auto refSelection = ldc.refinementSelector(); - setupBfs.addRefinementSelectionFunction(std::function(refSelection)); - const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]); - setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment); - setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]); - setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses); - - { - std::ostringstream oss; - oss << blockForestFilestem << ".bfs"; - setupBfs.saveToFile(oss.str().c_str()); - } - - if(writeVtk){ - setupBfs.writeVTKOutput(blockForestFilestem); - } - - if(outputStatistics){ - WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================"); - WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks()) - for (uint_t level = 0; level <= refinementDepth; level++) - { - const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level); - WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks) - } - - const uint_t avgBlocksPerProc = setupBfs.getNumberOfBlocks() / setupBfs.getNumberOfProcesses(); - WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc); - WALBERLA_LOG_INFO_ON_ROOT("================================================================================="); - } - - - WALBERLA_LOG_INFO_ON_ROOT("Ending program") - } -} diff --git a/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py index 51a0220b..108b431f 100644 --- a/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py +++ b/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py @@ -1,10 +1,25 @@ import waLBerla as wlb +from waLBerla.tools.config import block_decomposition from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle import sqlite3 import os import sys +try: + import machinestate as ms +except ImportError: + ms = None + DB_FILE = os.environ.get('DB_FILE', "cpu_benchmark.sqlite3") +BENCHMARK = int(os.environ.get('BENCHMARK', 0)) + +WeakX = int(os.environ.get('WeakX', 128)) +WeakY = int(os.environ.get('WeakY', 128)) +WeakZ = int(os.environ.get('WeakZ', 128)) + +StrongX = int(os.environ.get('StrongX', 128)) +StrongY = int(os.environ.get('StrongY', 128)) +StrongZ = int(os.environ.get('StrongZ', 128)) class Scenario: @@ -18,7 +33,8 @@ def __init__(self, vtk_write_frequency=0, logger_frequency=0, blockforest_filestem="blockforest", - write_setup_vtk=False): + write_setup_vtk=True, + db_file_name=None): self.domain_size = domain_size self.root_blocks = root_blocks @@ -34,6 +50,8 @@ def __init__(self, self.vtk_write_frequency = vtk_write_frequency self.logger_frequency = logger_frequency + self.db_file_name = DB_FILE if db_file_name is None else db_file_name + self.config_dict = self.config(print_dict=False) @wlb.member_callback @@ -51,7 +69,8 @@ def config(self, print_dict=True): 'numProcesses': self.num_processes, 'blockForestFilestem': self.bfs_filestem, 'writeVtk': self.write_setup_vtk, - 'outputStatistics': False + 'outputStatistics': True, + 'writeSetupForestAndReturn': True, }, 'Parameters': { 'omega': 1.95, @@ -59,14 +78,15 @@ def config(self, print_dict=True): 'remainingTimeLoggerFrequency': self.logger_frequency, 'vtkWriteFrequency': self.vtk_write_frequency, 'useVTKAMRWriter': True, - 'oneFilePerProcess': False + 'oneFilePerProcess': False, + 'writeOnlySlice': False }, 'Logging': { 'logLevel': "info", } } - if (print_dict): + if print_dict: wlb.log_info_on_root("Scenario:\n" + pformat(config_dict)) return config_dict @@ -82,6 +102,15 @@ def results_callback(self, **kwargs): data['compile_flags'] = wlb.build_info.compiler_flags data['walberla_version'] = wlb.build_info.version data['build_machine'] = wlb.build_info.build_machine + + if ms: + state = ms.MachineState(extended=False, anonymous=True) + state.generate() # generate subclasses + state.update() # read information + data["MachineState"] = str(state.get()) + else: + print("MachineState module is not available. MachineState was not saved") + sequenceValuesToScalars(data) result = data @@ -92,52 +121,109 @@ def results_callback(self, **kwargs): table_name = table_name.replace("-", "_") for num_try in range(num_tries): try: - checkAndUpdateSchema(result, table_name, DB_FILE) - storeSingle(result, table_name, DB_FILE) + checkAndUpdateSchema(result, table_name, self.db_file_name) + storeSingle(result, table_name, self.db_file_name) break except sqlite3.OperationalError as e: wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}") -def validation_run(): - """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works""" - wlb.log_info_on_root("Validation run") +def weak_scaling_ldc(num_proc, uniform=False): + wlb.log_info_on_root("Running weak scaling benchmark...") - domain_size = (96, 96, 96) - cells_per_block = (32, 32, 32) + # This benchmark must run from 16 processes onwards + if wlb.mpi.numProcesses() > 1: + num_proc = wlb.mpi.numProcesses() + + if uniform: + factor = 3 * num_proc + name = "uniform" + else: + if num_proc % 16 != 0: + raise RuntimeError("Number of processes must be dividable by 16") + factor = int(num_proc // 16) + name = "nonuniform" + + cells_per_block = (WeakX, WeakY, WeakZ) + domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor) root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)]) scenarios = wlb.ScenarioManager() - scenario = Scenario(domain_size=domain_size, + scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}", + domain_size=domain_size, root_blocks=root_blocks, - num_processes=1, - refinement_depth=1, + num_processes=num_proc, cells_per_block=cells_per_block, - timesteps=201, - vtk_write_frequency=100, - logger_frequency=5, - write_setup_vtk=True) + refinement_depth=0 if uniform else 3, + timesteps=10, + db_file_name=f"weakScalingCPU{name}LDC.sqlite3") + scenarios.add(scenario) + + +def strong_scaling_ldc(num_proc, uniform=False): + wlb.log_info_on_root("Running strong scaling benchmark...") + + # This benchmark must run from 64 GPUs onwards + if wlb.mpi.numProcesses() > 1: + num_proc = wlb.mpi.numProcesses() + + if num_proc % 64 != 0: + raise RuntimeError("Number of processes must be dividable by 64") + + cells_per_block = (StrongX, StrongY, StrongZ) + + if uniform: + domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16) + name = "uniform" + else: + factor = int(num_proc / 64) + blocks64 = block_decomposition(factor) + cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))]) + domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor) + name = "nonuniform" + + root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)]) + + scenarios = wlb.ScenarioManager() + scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}", + domain_size=domain_size, + root_blocks=root_blocks, + num_processes=num_proc, + cells_per_block=cells_per_block, + refinement_depth=0 if uniform else 3, + timesteps=10, + db_file_name=f"strongScalingCPU{name}LDC.sqlite3") scenarios.add(scenario) -def scaling(): - wlb.log_info_on_root("Running scaling benchmark...") +def validation_run(): + """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works""" + wlb.log_info_on_root("Validation run") - numProc = wlb.mpi.numProcesses() + domain_size = (96, 96, 32) + cells_per_block = (32, 32, 32) - domain_size = (256, 256, 128 * numProc) - cells_per_block = (64, 64, 64) root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)]) scenarios = wlb.ScenarioManager() scenario = Scenario(domain_size=domain_size, root_blocks=root_blocks, + num_processes=1, + refinement_depth=3, cells_per_block=cells_per_block, - refinement_depth=2, - timesteps=10) + timesteps=1001, + vtk_write_frequency=100, + logger_frequency=5, + write_setup_vtk=True) scenarios.add(scenario) -validation_run() -# scaling() +if BENCHMARK == 0: + validation_run() +elif BENCHMARK == 1: + weak_scaling_ldc(1, False) +elif BENCHMARK == 2: + strong_scaling_ldc(1, False) +else: + print(f"Invalid benchmark case {BENCHMARK}") diff --git a/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt b/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt index f6b4e1ff..c8b02785 100644 --- a/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt +++ b/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt @@ -11,5 +11,5 @@ waLBerla_generate_target_from_python(NAME NonUniformGridGPUGenerated NonUniformGridGPUBoundaryCollection.h NonUniformGridGPUInfoHeader.h) waLBerla_add_executable( NAME NonUniformGridGPU - FILES NonUniformGridGPU.cpp LdcSetup.h + FILES NonUniformGridGPU.cpp LdcSetup.h GridGeneration.h DEPENDS blockforest boundary core gpu domain_decomposition field geometry lbm_generated python_coupling timeloop vtk NonUniformGridGPUGenerated ) \ No newline at end of file diff --git a/apps/benchmarks/NonUniformGridGPU/GridGeneration.h b/apps/benchmarks/NonUniformGridGPU/GridGeneration.h new file mode 100644 index 00000000..5de0a45d --- /dev/null +++ b/apps/benchmarks/NonUniformGridGPU/GridGeneration.h @@ -0,0 +1,139 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \file GridGeneration.h +//! \author Markus Holzer +// +//====================================================================================================================== +#pragma once + +#include "blockforest/Initialization.h" +#include "blockforest/SetupBlock.h" +#include "blockforest/SetupBlockForest.h" +#include "blockforest/loadbalancing/StaticCurve.h" + +#include "core/Environment.h" +#include "core/logging/Initialization.h" +#include "core/timing/RemainingTimeLogger.h" +#include "core/timing/TimingPool.h" + +#include + +#include "LdcSetup.h" +#include "NonUniformGridGPUInfoHeader.h" + +using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification; +using Stencil_T = StorageSpecification_T::Stencil; + +using namespace walberla; + +void createSetupBlockForest(SetupBlockForest& setupBfs, + const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup, + const bool useMPIManager=false) +{ + WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...") + + Vector3 domainSize = domainSetup.getParameter >("domainSize"); + Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock"); + Vector3 rootBlocks = domainSetup.getParameter >("rootBlocks"); + Vector3 periodic = domainSetup.getParameter >("periodic"); + + const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1)); + uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses"); + const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest"); + const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false); + const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false); + + if(useMPIManager) + numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses()); + + const LDC ldc(refinementDepth); + + auto refSelection = ldc.refinementSelector(); + setupBfs.addRefinementSelectionFunction(std::function(refSelection)); + const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]); + setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment); + setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]); + setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses); + + if(mpi::MPIManager::instance()->numProcesses() > 1) + return; + + { + std::ostringstream oss; + oss << blockForestFilestem << ".bfs"; + setupBfs.saveToFile(oss.str().c_str()); + } + + if(writeVtk){ + setupBfs.writeVTKOutput(blockForestFilestem); + } + + if(outputStatistics){ + WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================"); + WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks()) + for (uint_t level = 0; level <= refinementDepth; level++){ + const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level); + WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks) + } + + const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses()); + WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc); + + const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]; + const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]); + + const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q; + const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE); + const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type); + const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9; + const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9; + + WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)") + WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB") + WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB") + + WALBERLA_LOG_INFO_ON_ROOT("================================================================================="); + } +} + +void createBlockForest(shared_ptr< BlockForest >& bfs, + const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup) +{ + if (mpi::MPIManager::instance()->numProcesses() > 1){ + const std::string blockForestFilestem = + blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest"); + // Load structured block forest from file + std::ostringstream oss; + oss << blockForestFilestem << ".bfs"; + const std::string setupBlockForestFilepath = oss.str(); + std::ifstream infile(setupBlockForestFilepath.c_str()); + if(!infile.good()){ + WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!") + SetupBlockForest setupBfs; + createSetupBlockForest(setupBfs, domainSetup, blockForestSetup, true); + bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs); + } + else{ + bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), + setupBlockForestFilepath.c_str(), false); + } + } + else{ + SetupBlockForest setupBfs; + createSetupBlockForest(setupBfs, domainSetup, blockForestSetup); + bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs); + } +} \ No newline at end of file diff --git a/apps/benchmarks/NonUniformGridGPU/LdcSetup.h b/apps/benchmarks/NonUniformGridGPU/LdcSetup.h index 238943a7..b8431f2f 100644 --- a/apps/benchmarks/NonUniformGridGPU/LdcSetup.h +++ b/apps/benchmarks/NonUniformGridGPU/LdcSetup.h @@ -31,7 +31,9 @@ #include "field/FlagUID.h" using namespace walberla; + using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction; + using FlagField_T = FlagField< uint8_t >; class LDCRefinement @@ -46,14 +48,8 @@ class LDCRefinement { const AABB & domain = forest.getDomain(); - const real_t xSize = ( domain.xSize() / real_t(12) ) * real_c( 0.99 ); - const real_t ySize = ( domain.ySize() / real_t(12) ) * real_c( 0.99 ); - - const AABB leftCorner( domain.xMin(), domain.yMin(), domain.zMin(), - domain.xMin() + xSize, domain.yMin() + ySize, domain.zMax() ); - - const AABB rightCorner( domain.xMax() - xSize, domain.yMin(), domain.zMin(), - domain.xMax(), domain.yMin() + ySize, domain.zMax() ); + const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() ); + const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() ); for(auto & block : forest) { @@ -99,8 +95,7 @@ class LDC Cell globalCell(localCell); sbfs.transformBlockLocalToGlobalCell(globalCell, b); if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, ubbFlag); } - else if (globalCell.z() < 0 || globalCell.y() < 0 || globalCell.x() < 0 || - globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)) || globalCell.z() >= cell_idx_c(sbfs.getNumberOfZCells(level))) + else if (globalCell.y() < 0 || globalCell.x() < 0 || globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level))) { flagField->addFlag(localCell, noslipFlag); } diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp index 23310334..818f612b 100644 --- a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp +++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp @@ -19,7 +19,6 @@ //====================================================================================================================== #include "blockforest/Initialization.h" -#include "blockforest/loadbalancing/StaticCurve.h" #include "core/Environment.h" #include "core/logging/Initialization.h" @@ -34,67 +33,55 @@ #include "gpu/AddGPUFieldToStorage.h" #include "gpu/DeviceSelectMPI.h" -#include "gpu/FieldCopy.h" #include "gpu/ErrorChecking.h" +#include "gpu/FieldCopy.h" #include "gpu/HostFieldAllocator.h" #include "gpu/ParallelStreams.h" #include "gpu/communication/NonUniformGPUScheme.h" -#include "lbm_generated/evaluation/PerformanceEvaluation.h" -#include "lbm_generated/field/PdfField.h" -#include "lbm_generated/field/AddToStorage.h" -#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h" -#include "lbm_generated/gpu/GPUPdfField.h" -#include "lbm_generated/gpu/AddToStorage.h" -#include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h" - #include "python_coupling/CreateConfig.h" +#include "python_coupling/DictWrapper.h" #include "python_coupling/PythonCallback.h" #include +#include "GridGeneration.h" #include "LdcSetup.h" #include "NonUniformGridGPUInfoHeader.h" +#include "lbm_generated/evaluation/PerformanceEvaluation.h" +#include "lbm_generated/field/AddToStorage.h" +#include "lbm_generated/field/PdfField.h" +#include "lbm_generated/gpu/AddToStorage.h" +#include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h" +#include "lbm_generated/gpu/GPUPdfField.h" +#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h" using namespace walberla; using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification; -using Stencil_T = StorageSpecification_T::Stencil; +using Stencil_T = StorageSpecification_T::Stencil; using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil; -using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >; -using GPUPdfField_T = lbm_generated::GPUPdfField< StorageSpecification_T >; -using FlagField_T = FlagField< uint8_t >; +using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >; +using GPUPdfField_T = lbm_generated::GPUPdfField< StorageSpecification_T >; +using FlagField_T = FlagField< uint8_t >; using BoundaryCollection_T = lbm::NonUniformGridGPUBoundaryCollection< FlagField_T >; using SweepCollection_T = lbm::NonUniformGridGPUSweepCollection; using gpu::communication::NonUniformGPUScheme; -namespace { -void createSetupBlockForest(SetupBlockForest& setupBfs, const Config::BlockHandle& domainSetup, LDC& ldcSetup, const uint_t numProcesses=uint_c(MPIManager::instance()->numProcesses())) { - Vector3 domainSize = domainSetup.getParameter >("domainSize"); - Vector3 rootBlocks = domainSetup.getParameter >("rootBlocks"); - Vector3 periodic = domainSetup.getParameter >("periodic"); - - auto refSelection = ldcSetup.refinementSelector(); - setupBfs.addRefinementSelectionFunction(std::function(refSelection)); - const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]); - setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment); - setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]); - setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses); -} -} - int main(int argc, char** argv) { const mpi::Environment env(argc, argv); mpi::MPIManager::instance()->useWorldComm(); gpu::selectDeviceBasedOnMpiRank(); + const std::string input_filename(argv[1]); + const bool inputIsPython = string_ends_with(input_filename, ".py"); + for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg) { WALBERLA_MPI_WORLD_BARRIER() - WALBERLA_GPU_CHECK(gpuPeekAtLastError()) ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -103,68 +90,32 @@ int main(int argc, char** argv) auto config = *cfg; logging::configureLogging(config); - auto domainSetup = config->getOneBlock("DomainSetup"); + auto domainSetup = config->getOneBlock("DomainSetup"); + auto blockForestSetup = config->getOneBlock("SetupBlockForest"); + const bool writeSetupForestAndReturn = blockForestSetup.getParameter< bool >("writeSetupForestAndReturn", true); + Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock"); // Reading parameters - auto parameters = config->getOneBlock("Parameters"); - const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.4)); - const uint_t refinementDepth = parameters.getParameter< uint_t >("refinementDepth", uint_c(1)); - const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50)); - const bool cudaEnabledMPI = parameters.getParameter< bool >("cudaEnabledMPI", false); - const bool writeSetupForestAndReturn = parameters.getParameter< bool >("writeSetupForestAndReturn", false); - const bool benchmarkKernelOnly = parameters.getParameter< bool >("benchmarkKernelOnly", false); - const uint_t numProcesses = parameters.getParameter< uint_t >( "numProcesses"); - - auto ldc = std::make_shared< LDC >(refinementDepth ); - SetupBlockForest setupBfs; - if (writeSetupForestAndReturn) - { - WALBERLA_LOG_INFO_ON_ROOT("Creating SetupBlockForest for " << numProcesses << " processes") - WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...") - createSetupBlockForest(setupBfs, domainSetup, *ldc, numProcesses); - - WALBERLA_ROOT_SECTION() { setupBfs.writeVTKOutput("SetupBlockForest"); } + auto parameters = config->getOneBlock("Parameters"); + const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.95)); + const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50)); + const bool gpuEnabledMPI = parameters.getParameter< bool >("gpuEnabledMPI", false); - WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks()) - uint_t totalCellUpdates( 0.0 ); - for (uint_t level = 0; level <= refinementDepth; level++) - { - const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level); - const uint_t numberOfCells = numberOfBlocks * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]; - totalCellUpdates += timesteps * math::uintPow2(level) * numberOfCells; - WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks) - } - cudaDeviceProp prop{}; - WALBERLA_GPU_CHECK(gpuGetDeviceProperties(&prop, 0)) - - const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]; + shared_ptr< BlockForest > bfs; + createBlockForest(bfs, domainSetup, blockForestSetup); - const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q; - const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE); - const uint_t sizePerValue = sizeof(PdfField_T::value_type); - const double totalGPUMem = double_c(prop.totalGlobalMem) * 1e-9; - const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9; - - WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)") - WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB") - WALBERLA_LOG_INFO_ON_ROOT( "The total cell updates after " << timesteps << " timesteps (on the coarse level) will be " << totalCellUpdates) - WALBERLA_LOG_INFO_ON_ROOT( "Total GPU memory " << totalGPUMem) - - WALBERLA_LOG_INFO_ON_ROOT("Ending program") + if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1) + { + WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program") return EXIT_SUCCESS; } - WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...") - createSetupBlockForest(setupBfs, domainSetup, *ldc); - - // Create structured block forest - WALBERLA_LOG_INFO_ON_ROOT("Creating structured block forest...") - auto bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs); - auto blocks = std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]); + auto blocks = + std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]); blocks->createCellBoundingBoxes(); - WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks()) - for (uint_t level = 0; level <= refinementDepth; level++) + WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks() << " on " << blocks->getNumberOfLevels() << " refinement levels") + for (uint_t level = 0; level < blocks->getNumberOfLevels(); level++) { WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << blocks->getNumberOfBlocks(level)) } @@ -172,26 +123,35 @@ int main(int argc, char** argv) WALBERLA_LOG_INFO_ON_ROOT("Start field allocation") // Creating fields const StorageSpecification_T StorageSpec = StorageSpecification_T(); - auto allocator = make_shared< gpu::HostFieldAllocator >(); - const BlockDataID pdfFieldCpuID = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx, allocator); - const BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2), allocator); - const BlockDataID densityFieldCpuID = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2), allocator); - const BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3)); - - const BlockDataID pdfFieldGpuID = lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, pdfFieldCpuID, StorageSpec, "pdfs on GPU", true); + auto allocator = make_shared< gpu::HostFieldAllocator< real_t > >(); + const BlockDataID pdfFieldCpuID = + lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx, allocator); + const BlockDataID velFieldCpuID = + field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2), allocator); + const BlockDataID densityFieldCpuID = + field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2), allocator); + const BlockDataID flagFieldID = + field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3)); + + const BlockDataID pdfFieldGpuID = + lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, pdfFieldCpuID, StorageSpec, "pdfs on GPU", true); const BlockDataID velFieldGpuID = gpu::addGPUFieldToStorage< VelocityField_T >(blocks, velFieldCpuID, "velocity on GPU", true); const BlockDataID densityFieldGpuID = gpu::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldCpuID, "velocity on GPU", true); WALBERLA_LOG_INFO_ON_ROOT("Finished field allocation") - const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3 >("innerOuterSplit", Vector3(1, 1, 1))); - Vector3< int32_t > gpuBlockSize = parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1)); - SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit); - for (auto& iBlock : *blocks) - { - sweepCollection.initialise(&iBlock, cell_idx_c(1), nullptr); + const Cell innerOuterSplit = + Cell(parameters.getParameter< Vector3< cell_idx_t > >("innerOuterSplit", Vector3< cell_idx_t >(1, 1, 1))); + Vector3< int32_t > gpuBlockSize = + parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1)); + SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0], + gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit); + + for (auto& iBlock : *blocks){ + sweepCollection.initialise(&iBlock, cell_idx_c(1)); } + sweepCollection.initialiseBlockPointer(); WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) WALBERLA_GPU_CHECK(gpuPeekAtLastError()) WALBERLA_MPI_BARRIER() @@ -200,9 +160,11 @@ int main(int argc, char** argv) /// LB SWEEPS AND BOUNDARY HANDLING /// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + auto ldc = std::make_shared< LDC >(blocks->getDepth()); + const FlagUID fluidFlagUID("Fluid"); ldc->setupBoundaryFlagField(*blocks, flagFieldID); - geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 2); + geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 0); BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldGpuID, fluidFlagUID); ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -210,8 +172,8 @@ int main(int argc, char** argv) ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...") - auto communication = std::make_shared< NonUniformGPUScheme > (blocks, cudaEnabledMPI); - auto packInfo = lbm_generated::setupNonuniformGPUPdfCommunication(blocks, pdfFieldGpuID); + auto communication = std::make_shared< NonUniformGPUScheme< CommunicationStencil_T > >(blocks, gpuEnabledMPI); + auto packInfo = lbm_generated::setupNonuniformGPUPdfCommunication< GPUPdfField_T >(blocks, pdfFieldGpuID); communication->addPackInfo(packInfo); WALBERLA_MPI_BARRIER() @@ -224,28 +186,31 @@ int main(int argc, char** argv) sweepCollection.setOuterPriority(streamHighPriority); auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority); - lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T > LBMMeshRefinement(blocks, pdfFieldGpuID, sweepCollection, boundaryCollection, communication, packInfo); + lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T > + LBMMeshRefinement(blocks, pdfFieldGpuID, sweepCollection, boundaryCollection, communication, packInfo); SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps); - // LBMMeshRefinement.test(5); - // return EXIT_SUCCESS; - - if(benchmarkKernelOnly){ - timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL), "LBM StreamCollide"); - } - else{ - LBMMeshRefinement.addRefinementToTimeLoop(timeLoop); - } + LBMMeshRefinement.addRefinementToTimeLoop(timeLoop, uint_c(0)); // VTK const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0); - if (vtkWriteFrequency > 0) - { - auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", - "simulation_step", false, true, true, false, 0); + const bool useVTKAMRWriter = parameters.getParameter< bool >("useVTKAMRWriter", false); + const bool oneFilePerProcess = parameters.getParameter< bool >("oneFilePerProcess", false); + + auto finalDomain = blocks->getDomain(); + if (vtkWriteFrequency > 0){ + auto vtkOutput = + vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", "simulation_step", + false, true, true, false, 0, useVTKAMRWriter, oneFilePerProcess); auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldCpuID, "vel"); vtkOutput->addCellDataWriter(velWriter); + if (parameters.getParameter< bool >("writeOnlySlice", true)){ + const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - blocks->dz(blocks->getDepth()), + finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + blocks->dz(blocks->getDepth())); + vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY)); + } + vtkOutput->addBeforeFunction([&]() { for (auto& block : *blocks) sweepCollection.calculateMacroscopicParameters(&block); @@ -260,17 +225,17 @@ int main(int argc, char** argv) auto remainingTimeLoggerFrequency = parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(-1.0)); // in seconds - if (remainingTimeLoggerFrequency > 0) - { + if (remainingTimeLoggerFrequency > 0){ auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency); timeLoop.addFuncAfterTimeStep(logger, "remaining time logger"); } - lbm_generated::PerformanceEvaluation const performance(blocks, flagFieldID, fluidFlagUID); - field::CellCounter< FlagField_T > fluidCells( blocks, flagFieldID, fluidFlagUID ); + lbm_generated::PerformanceEvaluation< FlagField_T > const performance(blocks, flagFieldID, fluidFlagUID); + field::CellCounter< FlagField_T > fluidCells(blocks, flagFieldID, fluidFlagUID); fluidCells(); - WALBERLA_LOG_INFO_ON_ROOT( "Non uniform Grid benchmark with " << fluidCells.numberOfCells() << " fluid cells (in total on all levels)") + WALBERLA_LOG_INFO_ON_ROOT("Non uniform Grid benchmark with " << fluidCells.numberOfCells() + << " fluid cells (in total on all levels)") WcTimingPool timeloopTiming; WcTimer simTimer; @@ -294,6 +259,32 @@ int main(int argc, char** argv) const auto reducedTimeloopTiming = timeloopTiming.getReduced(); WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming) + + WALBERLA_ROOT_SECTION() + { + if (inputIsPython) + { + python_coupling::PythonCallback pythonCallbackResults("results_callback"); + if (pythonCallbackResults.isCallable()) + { + pythonCallbackResults.data().exposeValue("numProcesses", lbm_generated::PerformanceEvaluation< FlagField_T >::processes()); + pythonCallbackResults.data().exposeValue("numThreads", performance.threads()); + pythonCallbackResults.data().exposeValue("numCores", performance.cores()); + pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells()); + pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells()); + pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time)); + pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time)); + pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time)); + pythonCallbackResults.data().exposeValue("stencil", infoStencil); + pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern); + pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup); + pythonCallbackResults.data().exposeValue("cse_global", infoCseGlobal); + pythonCallbackResults.data().exposeValue("cse_pdfs", infoCsePdfs); + // Call Python function to report results + pythonCallbackResults(); + } + } + } } return EXIT_SUCCESS; } \ No newline at end of file diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py index d523b5c0..a1f5bff2 100644 --- a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py +++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py @@ -7,7 +7,7 @@ from lbmpy.advanced_streaming.utility import get_timesteps from lbmpy.boundaries import NoSlip, UBB from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule -from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil +from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil, SubgridScaleModel from pystencils_walberla import CodeGeneration, generate_info_header from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator @@ -30,20 +30,25 @@ const bool infoCseGlobal = {cse_global}; const bool infoCsePdfs = {cse_pdfs}; """ - + with CodeGeneration() as ctx: field_type = "float64" if ctx.double_accuracy else "float32" - streaming_pattern = 'pull' + streaming_pattern = 'esopull' timesteps = get_timesteps(streaming_pattern) stencil = LBStencil(Stencil.D3Q19) + method_enum = Method.CUMULANT + + fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False + collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower() assert stencil.D == 3, "This application supports only three-dimensional stencils" pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx') density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx') macroscopic_fields = {'density': density_field, 'velocity': velocity_field} - lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega, + lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True, + fourth_order_correction=fourth_order_correction, streaming_pattern=streaming_pattern) lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx') @@ -66,7 +71,7 @@ infoHeaderParams = { 'stencil': stencil.name.lower(), 'streaming_pattern': streaming_pattern, - 'collision_setup': lbm_config.method.name.lower(), + 'collision_setup': collision_setup, 'cse_global': int(lbm_opt.cse_global), 'cse_pdfs': int(lbm_opt.cse_pdfs), } diff --git a/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py index 34bc6caa..6f7b6820 100644 --- a/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py +++ b/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py @@ -1,16 +1,58 @@ import waLBerla as wlb +from waLBerla.tools.config import block_decomposition +from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle +import sqlite3 +import os +import sys + +try: + import machinestate as ms +except ImportError: + ms = None + +DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3") +BENCHMARK = int(os.environ.get('BENCHMARK', 0)) + +WeakX = int(os.environ.get('WeakX', 128)) +WeakY = int(os.environ.get('WeakY', 128)) +WeakZ = int(os.environ.get('WeakZ', 128)) + +StrongX = int(os.environ.get('StrongX', 128)) +StrongY = int(os.environ.get('StrongY', 128)) +StrongZ = int(os.environ.get('StrongZ', 128)) class Scenario: - def __init__(self, domain_size=(64, 64, 64), root_blocks=(2, 2, 2), - cells_per_block=(32, 32, 32), refinement_depth=0): + def __init__(self, + domain_size=(64, 64, 64), + root_blocks=(2, 2, 2), + num_processes=1, + refinement_depth=0, + cells_per_block=(32, 32, 32), + timesteps=101, + gpu_enabled_mpi=False, + vtk_write_frequency=0, + logger_frequency=30, + blockforest_filestem="blockforest", + write_setup_vtk=True, + db_file_name=None): self.domain_size = domain_size self.root_blocks = root_blocks self.cells_per_block = cells_per_block + self.periodic = (0, 0, 1) + self.refinement_depth = refinement_depth + self.num_processes = num_processes + self.bfs_filestem = blockforest_filestem + self.write_setup_vtk = write_setup_vtk + + self.timesteps = timesteps + self.gpu_enabled_mpi = gpu_enabled_mpi + self.vtk_write_frequency = vtk_write_frequency + self.logger_frequency = logger_frequency - self.periodic = (0, 0, 0) + self.db_file_name = DB_FILE if db_file_name is None else db_file_name self.config_dict = self.config(print_dict=False) @@ -22,39 +64,79 @@ def config(self, print_dict=True): 'domainSize': self.domain_size, 'rootBlocks': self.root_blocks, 'cellsPerBlock': self.cells_per_block, - 'periodic': self.periodic + 'periodic': self.periodic, + }, + 'SetupBlockForest': { + 'refinementDepth': self.refinement_depth, + 'numProcesses': self.num_processes, + 'blockForestFilestem': self.bfs_filestem, + 'writeVtk': self.write_setup_vtk, + 'outputStatistics': True, + 'writeSetupForestAndReturn': True, }, 'Parameters': { 'omega': 1.95, - 'timesteps': 30001, - - 'refinementDepth': self.refinement_depth, - 'writeSetupForestAndReturn': False, - 'numProcesses': 1, - - 'cudaEnabledMPI': False, - 'benchmarkKernelOnly': False, - - 'remainingTimeLoggerFrequency': 3, - - 'vtkWriteFrequency': 10000, + 'timesteps': self.timesteps, + 'remainingTimeLoggerFrequency': self.logger_frequency, + 'vtkWriteFrequency': self.vtk_write_frequency, + 'useVTKAMRWriter': True, + 'oneFilePerProcess': False, + 'writeOnlySlice': False, + 'gpuEnabledMPI': self.gpu_enabled_mpi, + 'gpuBlockSize': (128, 1, 1), }, 'Logging': { 'logLevel': "info", } } - if print_dict and config_dict["Parameters"]["writeSetupForestAndReturn"] is False: + if print_dict: wlb.log_info_on_root("Scenario:\n" + pformat(config_dict)) + return config_dict + @wlb.member_callback + def results_callback(self, **kwargs): + data = {} + data.update(self.config_dict['Parameters']) + data.update(self.config_dict['DomainSetup']) + data.update(kwargs) + + data['executable'] = sys.argv[0] + data['compile_flags'] = wlb.build_info.compiler_flags + data['walberla_version'] = wlb.build_info.version + data['build_machine'] = wlb.build_info.build_machine + + if ms: + state = ms.MachineState(extended=False, anonymous=True) + state.generate() # generate subclasses + state.update() # read information + data["MachineState"] = str(state.get()) + else: + print("MachineState module is not available. MachineState was not saved") + + sequenceValuesToScalars(data) + result = data + sequenceValuesToScalars(result) + num_tries = 4 + # check multiple times e.g. may fail when multiple benchmark processes are running + table_name = f"runs" + table_name = table_name.replace("-", "_") + for num_try in range(num_tries): + try: + checkAndUpdateSchema(result, table_name, self.db_file_name) + storeSingle(result, table_name, self.db_file_name) + break + except sqlite3.OperationalError as e: + wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}") + def validation_run(): """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works""" wlb.log_info_on_root("Validation run") - domain_size = (96, 96, 96) - cells_per_block = (32, 32, 32) + domain_size = (192, 192, 64) + cells_per_block = (64, 64, 64) root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)]) @@ -62,8 +144,91 @@ def validation_run(): scenario = Scenario(domain_size=domain_size, root_blocks=root_blocks, cells_per_block=cells_per_block, - refinement_depth=1) + timesteps=0, + vtk_write_frequency=0, + refinement_depth=3, + gpu_enabled_mpi=False) scenarios.add(scenario) -validation_run() +def weak_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True): + wlb.log_info_on_root("Running weak scaling benchmark...") + + # This benchmark must run from 16 GPUs onwards + if wlb.mpi.numProcesses() > 1: + num_proc = wlb.mpi.numProcesses() + + if uniform: + factor = 3 * num_proc + name = "uniform" + else: + if num_proc % 16 != 0: + raise RuntimeError("Number of processes must be dividable by 16") + factor = int(num_proc // 16) + name = "nonuniform" + + cells_per_block = (WeakX, WeakY, WeakZ) + domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor) + + root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)]) + + scenarios = wlb.ScenarioManager() + scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}", + domain_size=domain_size, + root_blocks=root_blocks, + num_processes=num_proc, + cells_per_block=cells_per_block, + refinement_depth=0 if uniform else 3, + timesteps=10, + gpu_enabled_mpi=gpu_enabled_mpi, + db_file_name=f"weakScalingGPU{name}LDC.sqlite3") + scenarios.add(scenario) + + +def strong_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True): + wlb.log_info_on_root("Running strong scaling benchmark...") + + # This benchmark must run from 64 GPUs onwards + if wlb.mpi.numProcesses() > 1: + num_proc = wlb.mpi.numProcesses() + + if num_proc % 64 != 0: + raise RuntimeError("Number of processes must be dividable by 64") + + cells_per_block = (StrongX, StrongY, StrongZ) + + if uniform: + domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16) + name = "uniform" + else: + factor = int(num_proc / 64) + blocks64 = block_decomposition(factor) + cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))]) + domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor) + name = "nonuniform" + + root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)]) + + scenarios = wlb.ScenarioManager() + scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}", + domain_size=domain_size, + root_blocks=root_blocks, + num_processes=num_proc, + cells_per_block=cells_per_block, + refinement_depth=0 if uniform else 3, + timesteps=10, + gpu_enabled_mpi=gpu_enabled_mpi, + db_file_name=f"strongScalingGPU{name}LDC.sqlite3") + scenarios.add(scenario) + + +if BENCHMARK == 0: + validation_run() +elif BENCHMARK == 1: + weak_scaling_ldc(1, True, False) +elif BENCHMARK == 2: + strong_scaling_ldc(1, True, False) +else: + print(f"Invalid benchmark case {BENCHMARK}") + + diff --git a/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt b/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt index 1b530d61..bb199b8f 100644 --- a/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt +++ b/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt @@ -15,10 +15,10 @@ waLBerla_generate_target_from_python(NAME BenchmarkPhaseFieldCodeGen if (WALBERLA_BUILD_WITH_GPU_SUPPORT ) waLBerla_add_executable(NAME benchmark_multiphase FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py - DEPENDS blockforest core gpu field postprocessing python_coupling lbm geometry timeloop gui BenchmarkPhaseFieldCodeGen) + DEPENDS blockforest core gpu field postprocessing python_coupling lbm_generated geometry timeloop gui BenchmarkPhaseFieldCodeGen) else () waLBerla_add_executable(NAME benchmark_multiphase FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py - DEPENDS blockforest core field postprocessing python_coupling lbm geometry timeloop gui BenchmarkPhaseFieldCodeGen) + DEPENDS blockforest core field postprocessing python_coupling lbm_generated geometry timeloop gui BenchmarkPhaseFieldCodeGen) endif (WALBERLA_BUILD_WITH_GPU_SUPPORT ) diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py b/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py index 88a410c1..a703e9df 100755 --- a/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py +++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py @@ -8,6 +8,11 @@ import sys from math import prod +try: + import machinestate as ms +except ImportError: + ms = None + def domain_block_size_ok(block_size, total_mem, gls=1, q_phase=15, q_hydro=27, size_per_value=8): """Checks if a single block of given size fits into GPU memory""" @@ -20,7 +25,9 @@ def domain_block_size_ok(block_size, total_mem, gls=1, q_phase=15, q_hydro=27, s class Scenario: - def __init__(self, time_step_strategy, cuda_block_size, cells_per_block=(256, 256, 256), + def __init__(self, time_step_strategy, + cuda_block_size, + cells_per_block=(256, 256, 256), cuda_enabled_mpi=False): # output frequencies self.vtkWriteFrequency = 0 @@ -89,6 +96,14 @@ def results_callback(self, **kwargs): data['compile_flags'] = wlb.build_info.compiler_flags data['walberla_version'] = wlb.build_info.version data['build_machine'] = wlb.build_info.build_machine + if ms: + state = ms.MachineState(extended=False, anonymous=True) + state.generate() # generate subclasses + state.update() # read information + data["MachineState"] = str(state.get()) + else: + print("MachineState module is not available. MachineState was not saved") + sequenceValuesToScalars(data) df = pd.DataFrame.from_records([data]) @@ -101,43 +116,19 @@ def results_callback(self, **kwargs): def benchmark(): scenarios = wlb.ScenarioManager() - gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8)) + gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 40)) gpu_mem = gpu_mem_gb * (2 ** 30) - block_size = (256, 256, 256) + block_size = (320, 320, 320) + cuda_enabled_mpi = True if not domain_block_size_ok(block_size, gpu_mem): wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.") else: - scenarios.add(Scenario(time_step_strategy='normal', cuda_block_size=(256, 1, 1), cells_per_block=block_size)) - + scenarios.add(Scenario(time_step_strategy='normal', + cuda_block_size=(128, 1, 1), + cells_per_block=block_size, + cuda_enabled_mpi=cuda_enabled_mpi)) -def kernel_benchmark(): - scenarios = wlb.ScenarioManager() - - gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8)) - gpu_mem = gpu_mem_gb * (2 ** 30) - block_sizes = [(i, i, i) for i in (32, 64, 128, 256, 320, 384, 448, 512)] - - cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), - (32, 2, 1), (64, 2, 1), (128, 2, 1), - (32, 4, 1), (64, 4, 1), - (32, 4, 2), - (32, 8, 1), - (16, 16, 1)] - - for time_step_strategy in ['phase_only', 'hydro_only', 'kernel_only', 'normal']: - for cuda_block in cuda_blocks: - for block_size in block_sizes: - if not domain_block_size_ok(block_size, gpu_mem): - wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.") - continue - scenario = Scenario(time_step_strategy=time_step_strategy, - cuda_block_size=cuda_block, - cells_per_block=block_size) - scenarios.add(scenario) - - -# benchmark() -kernel_benchmark() +benchmark() diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp index 2a59e6be..fac902a0 100644 --- a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp +++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp @@ -29,6 +29,7 @@ #include "field/vtk/VTKWriter.h" #include "geometry/InitBoundaryHandling.h" +#include "lbm_generated/evaluation/PerformanceEvaluation.h" #include "python_coupling/CreateConfig.h" #include "python_coupling/DictWrapper.h" @@ -78,14 +79,10 @@ int main(int argc, char** argv) logging::configureLogging(config); shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGridFromConfig(config); - Vector3< uint_t > cellsPerBlock = - config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock"); // Reading parameters auto parameters = config->getOneBlock("Parameters"); const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "normal"); const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50)); - const real_t remainingTimeLoggerFrequency = - parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(3.0)); const uint_t scenario = parameters.getParameter< uint_t >("scenario", uint_c(1)); const uint_t warmupSteps = parameters.getParameter< uint_t >("warmupSteps", uint_t(2)); @@ -102,6 +99,7 @@ int main(int argc, char** argv) gpu::addGPUFieldToStorage< VelocityField_T >(blocks, vel_field, "velocity field on GPU", true); BlockDataID phase_field_gpu = gpu::addGPUFieldToStorage< PhaseField_T >(blocks, phase_field, "phase field on GPU", true); + BlockDataID phase_field_tmp = gpu::addGPUFieldToStorage< PhaseField_T >(blocks, phase_field, "temporary phasefield", true); #else BlockDataID lb_phase_field = field::addToStorage< PdfField_phase_T >(blocks, "lb phase field", real_c(0.0), field::fzyx); @@ -109,6 +107,7 @@ int main(int argc, char** argv) field::addToStorage< PdfField_hydro_T >(blocks, "lb velocity field", real_c(0.0), field::fzyx); BlockDataID vel_field = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx); BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_c(0.0), field::fzyx); + BlockDataID phase_field_tmp = field::addToStorage< PhaseField_T >(blocks, "phase tmp", real_c(0.0), field::fzyx); #endif if (timeStepStrategy != "phase_only" && timeStepStrategy != "hydro_only" && timeStepStrategy != "kernel_only") @@ -139,47 +138,80 @@ int main(int argc, char** argv) pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field_gpu, vel_field_gpu); pystencils::phase_field_LB_step phase_field_LB_step( - lb_phase_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]); + lb_phase_field_gpu, phase_field_gpu, phase_field_tmp, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]); pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]); #else pystencils::initialize_phase_field_distributions init_h(lb_phase_field, phase_field, vel_field); pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field, vel_field); - pystencils::phase_field_LB_step phase_field_LB_step(lb_phase_field, phase_field, vel_field); + pystencils::phase_field_LB_step phase_field_LB_step(lb_phase_field, phase_field, phase_field_tmp, vel_field); pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field, phase_field, vel_field); #endif // add communication #if defined(WALBERLA_BUILD_WITH_CUDA) - const bool cudaEnabledMpi = parameters.getParameter< bool >("cudaEnabledMpi", false); - auto Comm_velocity_based_distributions = - make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi); - auto generatedPackInfo_velocity_based_distributions = - make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu); - Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions); + const bool gpuEnabledMpi = parameters.getParameter< bool >("cudaEnabledMpi", false); + const int streamLowPriority = 0; + const int streamHighPriority = 0; + auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority); + auto innerOuterStreams = gpu::ParallelStreams(streamHighPriority); + + auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions>(lb_phase_field_gpu); + auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu); auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_gpu); - Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_phase_field); - auto Comm_phase_field_distributions = - make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi); - auto generatedPackInfo_phase_field_distributions = - make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field_gpu); - Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions); -#else + auto UniformGPUSchemeVelocityBasedDistributions = make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, gpuEnabledMpi, false); + auto UniformGPUSchemePhaseFieldDistributions = make_shared< gpu::communication::UniformGPUScheme< Full_Stencil_T > >(blocks, gpuEnabledMpi, false); + auto UniformGPUSchemePhaseField = make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, gpuEnabledMpi, false, 65432); + + UniformGPUSchemeVelocityBasedDistributions->addPackInfo(generatedPackInfo_velocity_based_distributions); + UniformGPUSchemePhaseFieldDistributions->addPackInfo(generatedPackInfo_phase_field_distributions); + UniformGPUSchemePhaseField->addPackInfo(generatedPackInfo_phase_field); + + auto Comm_velocity_based_distributions_start = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->startCommunication(); }); + auto Comm_velocity_based_distributions_wait = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->wait(); }); - blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_velocity_based_distributions(blocks); + auto Comm_phase_field_distributions_start = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->startCommunication(); }); + auto Comm_phase_field_distributions_wait = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->wait(); }); + auto Comm_phase_field = std::function< void() >([&]() { UniformGPUSchemePhaseField->communicate(); }); + + auto swapPhaseField = std::function< void(IBlock *) >([&](IBlock * b) + { + auto phaseField = b->getData< gpu::GPUField >(phase_field_gpu); + auto phaseFieldTMP = b->getData< gpu::GPUField >(phase_field_tmp); + phaseField->swapDataPointers(phaseFieldTMP); + }); + +#else + + auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions>(lb_phase_field); + auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field); auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field); - auto generatedPackInfo_velocity_based_distributions = - make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field); - Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_phase_field); - Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_velocity_based_distributions); + auto UniformGPUSchemeVelocityBasedDistributions = make_shared< blockforest::communication::UniformBufferedScheme< Full_Stencil_T > >(blocks); + auto UniformGPUSchemePhaseFieldDistributions = make_shared< blockforest::communication::UniformBufferedScheme< Full_Stencil_T > >(blocks); + auto UniformGPUSchemePhaseField = make_shared< blockforest::communication::UniformBufferedScheme< Full_Stencil_T > >(blocks, 65432); + + UniformGPUSchemeVelocityBasedDistributions->addPackInfo(generatedPackInfo_velocity_based_distributions); + UniformGPUSchemePhaseFieldDistributions->addPackInfo(generatedPackInfo_phase_field_distributions); + UniformGPUSchemePhaseField->addPackInfo(generatedPackInfo_phase_field); - blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_phase_field_distributions(blocks); - auto generatedPackInfo_phase_field_distributions = - make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field); - Comm_phase_field_distributions.addPackInfo(generatedPackInfo_phase_field_distributions); + auto Comm_velocity_based_distributions_start = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->startCommunication(); }); + auto Comm_velocity_based_distributions_wait = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->wait(); }); + + auto Comm_phase_field_distributions = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->communicate(); }); + auto Comm_phase_field_distributions_start = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->startCommunication(); }); + auto Comm_phase_field_distributions_wait = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->wait(); }); + + auto Comm_phase_field = std::function< void() >([&]() { UniformGPUSchemePhaseField->communicate(); }); + + auto swapPhaseField = std::function< void(IBlock *) >([&](IBlock * b) + { + auto phaseField = b->getData< PhaseField_T >(phase_field); + auto phaseFieldTMP = b->getData< PhaseField_T >(phase_field_tmp); + phaseField->swapDataPointers(phaseFieldTMP); + }); #endif BlockDataID const flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field"); @@ -201,99 +233,37 @@ int main(int argc, char** argv) init_h(&block); init_g(&block); } + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) + WALBERLA_GPU_CHECK(gpuPeekAtLastError()) + WALBERLA_MPI_BARRIER() WALBERLA_LOG_INFO_ON_ROOT("initialization of the distributions done") } + SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps); #if defined(WALBERLA_BUILD_WITH_CUDA) - int const streamLowPriority = 0; - int const streamHighPriority = 0; - auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority); - auto innerOuterStreams = gpu::ParallelStreams(streamHighPriority); -#endif + timeloop.add() << BeforeFunction(Comm_velocity_based_distributions_start, "Start Hydro PDFs Communication") + << Sweep(phase_field_LB_step.getSweep(defaultStream), "Phase LB Step") + << AfterFunction(Comm_velocity_based_distributions_wait, "Wait Hydro PDFs Communication"); - auto timeLoop = make_shared< SweepTimeloop >(blocks->getBlockStorage(), timesteps); -#if defined(WALBERLA_BUILD_WITH_CUDA) - auto normalTimeStep = [&]() { - Comm_velocity_based_distributions->startCommunication(); - for (auto& block : *blocks) - phase_field_LB_step(&block, defaultStream); - Comm_velocity_based_distributions->wait(); + timeloop.add() << BeforeFunction(Comm_phase_field_distributions_start, "Start Phase PDFs Communication") + << Sweep(hydro_LB_step.getSweep(defaultStream), "Hydro LB Step"); + timeloop.add() << Sweep(swapPhaseField, "Swap PhaseField") + << AfterFunction(Comm_phase_field_distributions_wait, "Wait Phase PDFs Communication"); + + timeloop.addFuncAfterTimeStep(Comm_phase_field, "Communication Phase field"); - Comm_phase_field_distributions->startCommunication(); - for (auto& block : *blocks) - hydro_LB_step(&block, defaultStream); - Comm_phase_field_distributions->wait(); - }; - auto phase_only = [&]() { - for (auto& block : *blocks) - phase_field_LB_step(&block); - }; - auto hydro_only = [&]() { - for (auto& block : *blocks) - hydro_LB_step(&block); - }; - auto without_comm = [&]() { - for (auto& block : *blocks) - phase_field_LB_step(&block); - for (auto& block : *blocks) - hydro_LB_step(&block); - }; #else - auto normalTimeStep = [&]() { - Comm_velocity_based_distributions.startCommunication(); - for (auto& block : *blocks) - phase_field_LB_step(&block); - Comm_velocity_based_distributions.wait(); - - Comm_phase_field_distributions.startCommunication(); - for (auto& block : *blocks) - hydro_LB_step(&block); - Comm_phase_field_distributions.wait(); - }; - auto phase_only = [&]() { - for (auto& block : *blocks) - phase_field_LB_step(&block); - }; - auto hydro_only = [&]() { - for (auto& block : *blocks) - hydro_LB_step(&block); - }; - auto without_comm = [&]() { - for (auto& block : *blocks) - phase_field_LB_step(&block); - for (auto& block : *blocks) - hydro_LB_step(&block); - }; -#endif - std::function< void() > timeStep; - if (timeStepStrategy == "phase_only") - { - timeStep = std::function< void() >(phase_only); - WALBERLA_LOG_INFO_ON_ROOT("started only phasefield step without communication for benchmarking") - } - else if (timeStepStrategy == "hydro_only") - { - timeStep = std::function< void() >(hydro_only); - WALBERLA_LOG_INFO_ON_ROOT("started only hydro step without communication for benchmarking") - } - else if (timeStepStrategy == "kernel_only") - { - timeStep = std::function< void() >(without_comm); - WALBERLA_LOG_INFO_ON_ROOT("started complete phasefield model without communication for benchmarking") - } - else - { - timeStep = std::function< void() >(normalTimeStep); - WALBERLA_LOG_INFO_ON_ROOT("normal timestep with overlapping") - } + timeloop.add() << BeforeFunction(Comm_velocity_based_distributions_start, "Start Hydro PDFs Communication") + << Sweep(phase_field_LB_step.getSweep(), "Phase LB Step") + << AfterFunction(Comm_velocity_based_distributions_wait, "Wait Hydro PDFs Communication"); - timeLoop->add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step"); + timeloop.add() << BeforeFunction(Comm_phase_field_distributions_start, "Start Phase PDFs Communication") + << Sweep(hydro_LB_step.getSweep(), "Hydro LB Step"); + timeloop.add() << Sweep(swapPhaseField, "Swap PhaseField") + << AfterFunction(Comm_phase_field_distributions_wait, "Wait Phase PDFs Communication"); - // remaining time logger - if (remainingTimeLoggerFrequency > 0) - timeLoop->addFuncAfterTimeStep( - timing::RemainingTimeLogger(timeLoop->getNrOfTimeSteps(), remainingTimeLoggerFrequency), - "remaining time logger"); + timeloop.addFuncAfterTimeStep(Comm_phase_field, "Communication Phase field"); +#endif uint_t const vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0); if (vtkWriteFrequency > 1) @@ -307,40 +277,60 @@ int main(int argc, char** argv) auto phaseWriter = make_shared< field::VTKWriter< PhaseField_T > >(phase_field, "phase"); vtkOutput->addCellDataWriter(phaseWriter); - timeLoop->addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); + timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); } + lbm_generated::PerformanceEvaluation< FlagField_T > const performance(blocks, flagFieldID, fluidFlagUID); + field::CellCounter< FlagField_T > fluidCells(blocks, flagFieldID, fluidFlagUID); + fluidCells(); + + WALBERLA_LOG_INFO_ON_ROOT("Multiphase benchmark with " << fluidCells.numberOfCells() << " fluid cells") + WALBERLA_LOG_INFO_ON_ROOT("Running " << warmupSteps << " timesteps to warm up the system") + for (uint_t i = 0; i < warmupSteps; ++i) - timeLoop->singleStep(); + timeloop.singleStep(); + + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) + WALBERLA_GPU_CHECK(gpuPeekAtLastError()) + WALBERLA_MPI_BARRIER() + WALBERLA_LOG_INFO_ON_ROOT("Warmup timesteps done") - timeLoop->setCurrentTimeStepToZero(); + timeloop.setCurrentTimeStepToZero(); + WALBERLA_MPI_BARRIER() WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps") + WcTimingPool timeloopTiming; WcTimer simTimer; #if defined(WALBERLA_BUILD_WITH_CUDA) - cudaDeviceSynchronize(); + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) #endif simTimer.start(); - timeLoop->run(); + timeloop.run(timeloopTiming); #if defined(WALBERLA_BUILD_WITH_CUDA) - cudaDeviceSynchronize(); + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) + WALBERLA_GPU_CHECK(gpuPeekAtLastError()) #endif + WALBERLA_MPI_BARRIER() simTimer.end(); WALBERLA_LOG_INFO_ON_ROOT("Simulation finished") - auto time = real_c(simTimer.last()); - auto nrOfCells = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]); - auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6; - WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process: " << mlupsPerProcess) + double time = simTimer.max(); + WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); } + performance.logResultOnRoot(timesteps, time); + + const auto reducedTimeloopTiming = timeloopTiming.getReduced(); + WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming) + + WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process: " << performance.mlupsPerProcess(timesteps, time)) WALBERLA_LOG_RESULT_ON_ROOT("Time per time step: " << time / real_c(timesteps) << " s") WALBERLA_ROOT_SECTION() { python_coupling::PythonCallback pythonCallbackResults("results_callback"); if (pythonCallbackResults.isCallable()) { - pythonCallbackResults.data().exposeValue("mlupsPerProcess", mlupsPerProcess); + pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time)); pythonCallbackResults.data().exposeValue("stencil_phase", StencilNamePhase); pythonCallbackResults.data().exposeValue("stencil_hydro", StencilNameHydro); #if defined(WALBERLA_BUILD_WITH_CUDA) - pythonCallbackResults.data().exposeValue("cuda_enabled_mpi", cudaEnabledMpi); + pythonCallbackResults.data().exposeValue("cuda_enabled_mpi", gpuEnabledMpi); #endif // Call Python function to report results pythonCallbackResults(); diff --git a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py index d0e676f5..30fe9297 100644 --- a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py +++ b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py @@ -19,7 +19,7 @@ field_type = "float64" if ctx.double_accuracy else "float32" stencil_phase = LBStencil(Stencil.D3Q15) - stencil_hydro = LBStencil(Stencil.D3Q27) + stencil_hydro = LBStencil(Stencil.D3Q19) assert (stencil_phase.D == stencil_hydro.D) ######################## @@ -76,13 +76,13 @@ delta_equilibrium=False, force=sp.symbols(f"F_:{stencil_phase.D}"), velocity_input=u, weighted=True, relaxation_rates=rates, - output={'density': C_tmp}, kernel_type='stream_pull_collide') + output={'density': C_tmp}) method_phase = create_lb_method(lbm_config=lbm_config_phase) lbm_config_hydro = LBMConfig(stencil=stencil_hydro, method=Method.MRT, compressible=False, weighted=True, relaxation_rate=omega, force=sp.symbols(f"F_:{stencil_hydro.D}"), - output={'velocity': u}, kernel_type='collide_stream_push') + output={'velocity': u}) method_hydro = create_lb_method(lbm_config=lbm_config_hydro) # create the kernels for the initialization of the g and h field @@ -137,7 +137,8 @@ sweep_params = {'block_size': sweep_block_size} stencil_typedefs = {'Stencil_phase_T': stencil_phase, - 'Stencil_hydro_T': stencil_hydro} + 'Stencil_hydro_T': stencil_hydro, + 'Full_Stencil_T': LBStencil(Stencil.D3Q27)} field_typedefs = {'PdfField_phase_T': h, 'PdfField_hydro_T': g, 'VelocityField_T': u, @@ -156,7 +157,7 @@ generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates, target=Target.CPU) generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step, - field_swaps=[(h, h_tmp), (C, C_tmp)], + field_swaps=[(h, h_tmp)], inner_outer_split=True, cpu_vectorize_info=cpu_vec, target=Target.CPU) @@ -172,7 +173,7 @@ streaming_pattern='pull', target=Target.CPU) generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g, - streaming_pattern='push', target=Target.CPU) + streaming_pattern='pull', target=Target.CPU) generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=Target.CPU) @@ -183,7 +184,7 @@ g_updates, target=Target.GPU) generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step, - field_swaps=[(h, h_tmp), (C, C_tmp)], + field_swaps=[(h, h_tmp)], target=Target.GPU, gpu_indexing_params=sweep_params, varying_parameters=vp) @@ -198,7 +199,7 @@ streaming_pattern='pull', target=Target.GPU) generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g, - streaming_pattern='push', target=Target.GPU) + streaming_pattern='pull', target=Target.GPU) generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=Target.GPU) diff --git a/apps/benchmarks/UniformGridCPU/CMakeLists.txt b/apps/benchmarks/UniformGridCPU/CMakeLists.txt index 8e9c1e7a..76c40ea5 100644 --- a/apps/benchmarks/UniformGridCPU/CMakeLists.txt +++ b/apps/benchmarks/UniformGridCPU/CMakeLists.txt @@ -3,14 +3,16 @@ waLBerla_link_files_to_builddir( "*.py" ) waLBerla_link_files_to_builddir( "simulation_setup" ) -foreach(streaming_pattern pull push aa esotwist) +foreach(streaming_pattern pull push aa esotwist esopull esopush) foreach(stencil d3q19 d3q27) - foreach (collision_setup srt trt w-mrt r-w-mrt cm r-cm k r-k entropic smagorinsky) + foreach (collision_setup srt trt mrt mrt-overrelax central central-overrelax cumulant cumulant-overrelax cumulant-K17 entropic smagorinsky qr) # KBC methods only for D2Q9 and D3Q27 defined if (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19") continue() - endif (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19") - + endif (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19") + if (${collision_setup} STREQUAL "cumulant-K17" AND ${stencil} STREQUAL "d3q19") + continue() + endif (${collision_setup} STREQUAL "cumulant-K17" AND ${stencil} STREQUAL "d3q19") set(config ${stencil}_${streaming_pattern}_${collision_setup}) waLBerla_generate_target_from_python(NAME UniformGridCPUGenerated_${config} FILE UniformGridCPU.py diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp b/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp index 4674cfae..a7eb9ecf 100644 --- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp +++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp @@ -64,6 +64,9 @@ using SweepCollection_T = lbm::UniformGridCPUSweepCollection; using blockforest::communication::UniformBufferedScheme; +using macroFieldType = VelocityField_T::value_type; +using pdfFieldType = PdfField_T::value_type; + int main(int argc, char** argv) { const mpi::Environment env(argc, argv); @@ -87,10 +90,10 @@ int main(int argc, char** argv) // Creating fields const StorageSpecification_T StorageSpec = StorageSpecification_T(); - auto fieldAllocator = make_shared< field::AllocateAligned< real_t, 64 > >(); + auto fieldAllocator = make_shared< field::AllocateAligned< pdfFieldType, 64 > >(); const BlockDataID pdfFieldId = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, field::fzyx, fieldAllocator); - const BlockDataID velFieldId = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx); - const BlockDataID densityFieldId = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx); + const BlockDataID velFieldId = field::addToStorage< VelocityField_T >(blocks, "vel", macroFieldType(0.0), field::fzyx); + const BlockDataID densityFieldId = field::addToStorage< ScalarField_T >(blocks, "density", macroFieldType(1.0), field::fzyx); const BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field"); // Initialize velocity on cpu @@ -235,12 +238,16 @@ int main(int argc, char** argv) pythonCallbackResults.data().exposeValue("numProcesses", performance.processes()); pythonCallbackResults.data().exposeValue("numThreads", performance.threads()); pythonCallbackResults.data().exposeValue("numCores", performance.cores()); + pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells()); + pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells()); pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time)); pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time)); pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time)); pythonCallbackResults.data().exposeValue("stencil", infoStencil); pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern); pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup); + pythonCallbackResults.data().exposeValue("vectorised", vectorised); + pythonCallbackResults.data().exposeValue("nontemporal", nontemporal); pythonCallbackResults.data().exposeValue("cse_global", infoCseGlobal); pythonCallbackResults.data().exposeValue("cse_pdfs", infoCsePdfs); // Call Python function to report results diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py index 5a600ead..723b28f1 100644 --- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py +++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py @@ -3,17 +3,12 @@ import sympy as sp import pystencils as ps -from pystencils.simp.subexpression_insertion import insert_zeros, insert_aliases, insert_constants,\ - insert_symbol_times_minus_one - from lbmpy.advanced_streaming import is_inplace -from lbmpy.advanced_streaming.utility import streaming_patterns, get_accessor, Timestep +from lbmpy.advanced_streaming.utility import streaming_patterns from lbmpy.boundaries import NoSlip, UBB from lbmpy.creationfunctions import LBMConfig, LBMOptimisation, LBStencil, create_lb_collision_rule from lbmpy.enums import Method, Stencil, SubgridScaleModel -from lbmpy.fieldaccess import CollideOnlyInplaceAccessor from lbmpy.moments import get_default_moment_set_for_stencil -from lbmpy.updatekernels import create_stream_only_kernel from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator @@ -21,48 +16,53 @@ omega = sp.symbols('omega') omega_free = sp.Symbol('omega_free') -# best configs in terms of FLOPS options_dict = { 'srt': { 'method': Method.SRT, 'relaxation_rate': omega, - 'compressible': True, + 'compressible': False, }, 'trt': { 'method': Method.TRT, 'relaxation_rate': omega, - 'compressible': True, + 'compressible': False, }, - 'r-w-mrt': { + 'mrt': { 'method': Method.MRT, 'relaxation_rates': [omega, 1, 1, 1, 1, 1, 1], - 'compressible': True, + 'compressible': False, }, - 'w-mrt': { + 'mrt-overrelax': { 'method': Method.MRT, 'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)], - 'compressible': True, + 'compressible': False, }, - 'r-cm': { + 'central': { 'method': Method.CENTRAL_MOMENT, 'relaxation_rate': omega, 'compressible': True, }, - 'cm': { + 'central-overrelax': { 'method': Method.CENTRAL_MOMENT, 'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)], 'compressible': True, }, - 'r-k': { - 'method': Method.CUMULANT, + 'cumulant': { + 'method': Method.MONOMIAL_CUMULANT, 'relaxation_rate': omega, 'compressible': True, }, - 'k': { - 'method': Method.CUMULANT, + 'cumulant-overrelax': { + 'method': Method.MONOMIAL_CUMULANT, 'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 18)], 'compressible': True, }, + 'cumulant-K17': { + 'method': Method.CUMULANT, + 'relaxation_rate': omega, + 'compressible': True, + 'fourth_order_correction': 0.01 + }, 'entropic': { 'method': Method.TRT_KBC_N4, 'compressible': True, @@ -75,6 +75,11 @@ 'method': Method.SRT, 'subgrid_scale_model': SubgridScaleModel.SMAGORINSKY, 'relaxation_rate': omega, + }, + 'qr': { + 'method': Method.SRT, + 'subgrid_scale_model': SubgridScaleModel.QR, + 'relaxation_rate': omega, } } @@ -83,6 +88,8 @@ const char * infoStencil = "{stencil}"; const char * infoStreamingPattern = "{streaming_pattern}"; const char * infoCollisionSetup = "{collision_setup}"; +const bool vectorised = {vec}; +const bool nontemporal = {nt_stores}; const bool infoCseGlobal = {cse_global}; const bool infoCsePdfs = {cse_pdfs}; """ @@ -90,10 +97,15 @@ with CodeGeneration() as ctx: openmp = True if ctx.openmp else False field_type = "float64" if ctx.double_accuracy else "float32" - if ctx.optimize_for_localhost: - cpu_vec = {"nontemporal": False, "assume_aligned": True} - else: - cpu_vec = None + # This base pointer specification causes introduces temporary pointers in the outer loop such that the inner loop + # only contains aligned memory addresses. Doing so NT Stores are much more effective which causes great perfomance + # gains especially for the pull scheme on skylake architectures + base_pointer_spec = None # [['spatialInner0'], ['spatialInner1']] + # cpu_vec = {"instruction_set": "best", "nontemporal": False, + # "assume_aligned": True, 'assume_sufficient_line_padding': True} + + cpu_vec = {"instruction_set": None} + nt_stores = False config_tokens = ctx.config.split('_') @@ -110,7 +122,6 @@ raise ValueError("Only D3Q27 and D3Q19 stencil are supported at the moment") assert streaming_pattern in streaming_patterns, f"Invalid streaming pattern: {streaming_pattern}" - options = options_dict[collision_setup] assert stencil.D == 3, "This application supports only three-dimensional stencils" @@ -121,23 +132,20 @@ lbm_config = LBMConfig(stencil=stencil, field_name=pdfs.name, streaming_pattern=streaming_pattern, **options) lbm_opt = LBMOptimisation(cse_global=True, cse_pdfs=False, symbolic_field=pdfs, field_layout='fzyx') + # This creates a simplified version of the central moment collision operator where the bulk and shear viscosity is + # not seperated. This is done to get a fair comparison with the monomial cumulants. if lbm_config.method == Method.CENTRAL_MOMENT: lbm_config = replace(lbm_config, nested_moments=get_default_moment_set_for_stencil(stencil)) if not is_inplace(streaming_pattern): lbm_opt = replace(lbm_opt, symbolic_temporary_field=pdfs_tmp) - field_swaps = [(pdfs, pdfs_tmp)] - else: - field_swaps = [] - # Sweep for Stream only. This is for benchmarking an empty streaming pattern without LBM. - # is_inplace is set to False to ensure that the streaming is done with src and dst field. - # If this is not the case the compiler might simplify the streaming in a way that benchmarking makes no sense. - # accessor = CollideOnlyInplaceAccessor() - accessor = get_accessor(streaming_pattern, Timestep.EVEN) - #accessor.is_inplace = False - field_swaps_stream_only = () if accessor.is_inplace else [(pdfs, pdfs_tmp)] - stream_only_kernel = create_stream_only_kernel(stencil, pdfs, None if accessor.is_inplace else pdfs_tmp, accessor=accessor) + # This is a microbenchmark for testing how fast Q PDFs can be updated per cell. To avoid optimisations from + # the compiler the PDFs are shuffled inside a cell. Otherwise, for common streaming patterns compilers would + # typically remove the copy of the center PDF which results in an overestimation of the maximum performance + stream_only_kernel = [] + for i in range(stencil.Q): + stream_only_kernel.append(ps.Assignment(pdfs(i), pdfs((i + 3) % stencil.Q))) # LB Sweep collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) @@ -153,17 +161,25 @@ nonuniform=False, boundaries=[no_slip, ubb], macroscopic_fields=macroscopic_fields, cpu_openmp=openmp, cpu_vectorize_info=cpu_vec, - set_pre_collision_pdfs=False) + base_pointer_specification=base_pointer_spec) # Stream only kernel + cpu_vec_stream = None + if ctx.optimize_for_localhost: + cpu_vec_stream = {"instruction_set": "best", "nontemporal": True, + "assume_aligned": True, 'assume_sufficient_line_padding': True, + "assume_inner_stride_one": True} + generate_sweep(ctx, 'UniformGridCPU_StreamOnlyKernel', stream_only_kernel, - field_swaps=field_swaps_stream_only, - target=ps.Target.CPU, cpu_openmp=openmp) + target=ps.Target.CPU, cpu_openmp=openmp, + cpu_vectorize_info=cpu_vec_stream, base_pointer_specification=[['spatialInner0'], ['spatialInner1']]) infoHeaderParams = { 'stencil': stencil_str, 'streaming_pattern': streaming_pattern, 'collision_setup': collision_setup, + 'vec': int(True if cpu_vec else False), + 'nt_stores': int(nt_stores), 'cse_global': int(lbm_opt.cse_global), 'cse_pdfs': int(lbm_opt.cse_pdfs), } diff --git a/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py index 21235056..3cfa7a91 100644 --- a/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py +++ b/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py @@ -4,13 +4,26 @@ from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle import sys import sqlite3 -from math import prod + +try: + import machinestate as ms +except ImportError: + ms = None # Number of time steps run for a workload of 128^3 per process # if double as many cells are on the process, half as many time steps are run etc. # increase this to get more reliable measurements TIME_STEPS_FOR_128_BLOCK = 10 DB_FILE = os.environ.get('DB_FILE', "cpu_benchmark.sqlite3") +BENCHMARK = int(os.environ.get('BENCHMARK', 0)) + +WeakX = int(os.environ.get('WeakX', 128)) +WeakY = int(os.environ.get('WeakY', 128)) +WeakZ = int(os.environ.get('WeakZ', 128)) + +StrongX = int(os.environ.get('StrongX', 128)) +StrongY = int(os.environ.get('StrongY', 128)) +StrongZ = int(os.environ.get('StrongZ', 128)) def num_time_steps(block_size, time_steps_for_128_block=TIME_STEPS_FOR_128_BLOCK): @@ -35,7 +48,7 @@ class Scenario: def __init__(self, cells_per_block=(128, 128, 128), periodic=(1, 1, 1), blocks_per_process=1, timesteps=None, time_step_strategy="normal", omega=1.8, inner_outer_split=(1, 1, 1), warmup_steps=2, outer_iterations=3, init_shear_flow=False, boundary_setup=False, - vtk_write_frequency=0, remaining_time_logger_frequency=-1): + vtk_write_frequency=0, remaining_time_logger_frequency=-1, db_file_name=None): if boundary_setup: init_shear_flow = False @@ -58,6 +71,7 @@ def __init__(self, cells_per_block=(128, 128, 128), periodic=(1, 1, 1), blocks_p self.vtk_write_frequency = vtk_write_frequency self.remaining_time_logger_frequency = remaining_time_logger_frequency + self.db_file_name = DB_FILE if db_file_name is None else db_file_name self.config_dict = self.config(print_dict=False) @@ -101,6 +115,15 @@ def results_callback(self, **kwargs): data['compile_flags'] = wlb.build_info.compiler_flags data['walberla_version'] = wlb.build_info.version data['build_machine'] = wlb.build_info.build_machine + + if ms: + state = ms.MachineState(extended=False, anonymous=True) + state.generate() # generate subclasses + state.update() # read information + data["MachineState"] = str(state.get()) + else: + print("MachineState module is not available. MachineState was not saved") + sequenceValuesToScalars(data) result = data @@ -111,8 +134,8 @@ def results_callback(self, **kwargs): table_name = table_name.replace("-", "_") for num_try in range(num_tries): try: - checkAndUpdateSchema(result, table_name, DB_FILE) - storeSingle(result, table_name, DB_FILE) + checkAndUpdateSchema(result, table_name, self.db_file_name) + storeSingle(result, table_name, self.db_file_name) break except sqlite3.OperationalError as e: wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}") @@ -156,18 +179,38 @@ def overlap_benchmark(): scenarios.add(scenario) -def scaling_benchmark(): - """Tests different communication overlapping strategies""" - wlb.log_info_on_root("Running scaling benchmark") +def weak_scaling_benchmark(): + wlb.log_info_on_root("Running weak scaling benchmark with one block per proc") wlb.log_info_on_root("") scenarios = wlb.ScenarioManager() - cells_per_block = [(32, 32, 32), (128, 128, 128)] - for cell_per_block in cells_per_block: - scenarios.add(Scenario(time_step_strategy='noOverlap', + for t in ["noOverlap", "simpleOverlap"]: + scenarios.add(Scenario(time_step_strategy=t, inner_outer_split=(1, 1, 1), - cells_per_block=cell_per_block)) + cells_per_block=(WeakX, WeakY, WeakZ), + boundary_setup=True, + outer_iterations=1, + db_file_name="weakScalingUniformGridOneBlock.sqlite3")) + + +def strong_scaling_benchmark(): + wlb.log_info_on_root("Running strong scaling benchmark with one block per proc") + wlb.log_info_on_root("") + + scenarios = wlb.ScenarioManager() + + domain_size = (StrongX, StrongY, StrongZ) + blocks = block_decomposition(wlb.mpi.numProcesses()) + cells_per_block = tuple([d // b for d, b in zip(domain_size, reversed(blocks))]) + + for t in ["noOverlap", "simpleOverlap"]: + scenarios.add(Scenario(cells_per_block=cells_per_block, + time_step_strategy=t, + outer_iterations=1, + timesteps=10, + boundary_setup=True, + db_file_name="strongScalingUniformGridOneBlock.sqlite3")) def single_node_benchmark(): @@ -176,13 +219,11 @@ def single_node_benchmark(): wlb.log_info_on_root("") scenarios = wlb.ScenarioManager() - block_sizes = [(i, i, i) for i in (8, 16, 32, 64, 128)] - for block_size in block_sizes: - scenario = Scenario(cells_per_block=block_size, - time_step_strategy='kernelOnly', - outer_iterations=1, - timesteps=num_time_steps(block_size)) - scenarios.add(scenario) + scenario = Scenario(cells_per_block=(128, 128, 128), + time_step_strategy='kernelOnly', + outer_iterations=1, + timesteps=10) + scenarios.add(scenario) def validation_run(): @@ -211,5 +252,15 @@ def validation_run(): # performance of compute kernel (no communication) # overlap_benchmark() # benchmarks different communication overlap options # profiling() # run only two timesteps on a smaller domain for profiling only -validation_run() +# validation_run() # scaling_benchmark() + + +if BENCHMARK == 0: + single_node_benchmark() +elif BENCHMARK == 1: + weak_scaling_benchmark() +elif BENCHMARK == 2: + strong_scaling_benchmark() +else: + validation_run() diff --git a/apps/benchmarks/UniformGridGPU/CMakeLists.txt b/apps/benchmarks/UniformGridGPU/CMakeLists.txt index 2607004f..25ca68ed 100644 --- a/apps/benchmarks/UniformGridGPU/CMakeLists.txt +++ b/apps/benchmarks/UniformGridGPU/CMakeLists.txt @@ -3,13 +3,16 @@ waLBerla_link_files_to_builddir( "*.py" ) waLBerla_link_files_to_builddir( "simulation_setup" ) -foreach(streaming_pattern pull push aa esotwist) +foreach(streaming_pattern pull push aa esotwist esopull esopush) foreach(stencil d3q19 d3q27) - foreach (collision_setup srt trt mrt mrt-overrelax central central-overrelax cumulant cumulant-overrelax entropic smagorinsky) + foreach (collision_setup srt trt mrt mrt-overrelax central central-overrelax cumulant cumulant-overrelax cumulant-K17 entropic smagorinsky qr) # KBC methods only for D2Q9 and D3Q27 defined if (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19") continue() - endif (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19") + endif (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19") + if (${collision_setup} STREQUAL "cumulant-K17" AND ${stencil} STREQUAL "d3q19") + continue() + endif (${collision_setup} STREQUAL "cumulant-K17" AND ${stencil} STREQUAL "d3q19") set(config ${stencil}_${streaming_pattern}_${collision_setup}) waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_${config} FILE UniformGridGPU.py diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp index 91b7a021..65e7b903 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp @@ -73,6 +73,8 @@ using SweepCollection_T = lbm::UniformGridGPUSweepCollection; using gpu::communication::UniformGPUScheme; +using macroFieldType = VelocityField_T::value_type; + int main(int argc, char** argv) { mpi::Environment const env(argc, argv); @@ -103,9 +105,9 @@ int main(int argc, char** argv) const StorageSpecification_T StorageSpec = StorageSpecification_T(); const BlockDataID pdfFieldCpuID = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(1), field::fzyx); - auto allocator = make_shared< gpu::HostFieldAllocator >(); // use pinned memory allocator for faster CPU-GPU memory transfers - const BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(1), allocator); - const BlockDataID densityFieldCpuID = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(1), allocator); + auto allocator = make_shared< gpu::HostFieldAllocator >(); // use pinned memory allocator for faster CPU-GPU memory transfers + const BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >(blocks, "vel", macroFieldType(0.0), field::fzyx, uint_c(1), allocator); + const BlockDataID densityFieldCpuID = field::addToStorage< ScalarField_T >(blocks, "density", macroFieldType(1.0), field::fzyx, uint_c(1), allocator); const BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field"); // Initialize velocity on cpu @@ -136,7 +138,7 @@ int main(int argc, char** argv) ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// LB SWEEPS AND BOUNDARY HANDLING /// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - const pystencils::UniformGridGPU_StreamOnlyKernel StreamOnlyKernel(pdfFieldGpuID, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]); + const pystencils::UniformGridGPU_StreamOnlyKernel StreamOnlyKernel(pdfFieldGpuID); // Boundaries const FlagUID fluidFlagUID("Fluid"); @@ -211,7 +213,7 @@ int main(int argc, char** argv) vtkOutput->addBeforeFunction([&]() { for (auto& block : *blocks) sweepCollection.calculateMacroscopicParameters(&block); - gpu::fieldCpy< VelocityField_T, gpu::GPUField< real_t > >(blocks, velFieldCpuID, velFieldGpuID); + gpu::fieldCpy< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(blocks, velFieldCpuID, velFieldGpuID); }); timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); } @@ -264,6 +266,13 @@ int main(int argc, char** argv) python_coupling::PythonCallback pythonCallbackResults("results_callback"); if (pythonCallbackResults.isCallable()) { + pythonCallbackResults.data().exposeValue("numProcesses", performance.processes()); + pythonCallbackResults.data().exposeValue("numThreads", performance.threads()); + pythonCallbackResults.data().exposeValue("numCores", performance.cores()); + pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells()); + pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells()); + pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time)); + pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time)); pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time)); pythonCallbackResults.data().exposeValue("stencil", infoStencil); pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern); diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py index 09235c43..10c562b3 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py @@ -4,18 +4,16 @@ from dataclasses import replace +from pystencils import Assignment from pystencils.typing import TypedSymbol from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions -from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil -from lbmpy.enums import SubgridScaleModel +from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil, SubgridScaleModel from lbmpy.advanced_streaming import is_inplace from lbmpy.advanced_streaming.utility import streaming_patterns from lbmpy.boundaries import NoSlip, UBB from lbmpy.creationfunctions import create_lb_collision_rule from lbmpy.moments import get_default_moment_set_for_stencil -from lbmpy.updatekernels import create_stream_only_kernel -from lbmpy.fieldaccess import * from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator @@ -75,6 +73,12 @@ 'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 18)], 'compressible': True, }, + 'cumulant-K17': { + 'method': Method.CUMULANT, + 'relaxation_rate': omega, + 'compressible': True, + 'fourth_order_correction': 0.01 + }, 'entropic': { 'method': Method.TRT_KBC_N4, 'compressible': True, @@ -87,6 +91,11 @@ 'method': Method.SRT, 'subgrid_scale_model': SubgridScaleModel.SMAGORINSKY, 'relaxation_rate': omega, + }, + 'qr': { + 'method': Method.SRT, + 'subgrid_scale_model': SubgridScaleModel.QR, + 'relaxation_rate': omega, } } @@ -102,7 +111,8 @@ optimize = True with CodeGeneration() as ctx: - field_type = "float64" if ctx.double_accuracy else "float32" + pdf_data_type = "float64" + field_data_type = "float64" config_tokens = ctx.config.split('_') assert len(config_tokens) >= 3 @@ -125,8 +135,8 @@ options = options_dict[collision_setup] assert stencil.D == 3, "This application supports only three-dimensional stencils" - pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx') - density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx') + pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {pdf_data_type}[3D]", layout='fzyx') + density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_data_type}[3D]", layout='fzyx') macroscopic_fields = {'density': density_field, 'velocity': velocity_field} lbm_config = LBMConfig(stencil=stencil, field_name=pdfs.name, streaming_pattern=streaming_pattern, **options) @@ -141,13 +151,12 @@ else: field_swaps = [] - # Sweep for Stream only. This is for benchmarking an empty streaming pattern without LBM. - # is_inplace is set to False to ensure that the streaming is done with src and dst field. - # If this is not the case the compiler might simplify the streaming in a way that benchmarking makes no sense. - accessor = CollideOnlyInplaceAccessor() - accessor.is_inplace = False - field_swaps_stream_only = [(pdfs, pdfs_tmp)] - stream_only_kernel = create_stream_only_kernel(stencil, pdfs, pdfs_tmp, accessor=accessor) + # This is a microbenchmark for testing how fast Q PDFs can be updated per cell. To avoid optimisations from + # the compiler the PDFs are shuffled inside a cell. Otherwise, for common streaming patterns compilers would + # typically remove the copy of the center PDF which results in an overestimation of the maximum performance + stream_only_kernel = [] + for i in range(stencil.Q): + stream_only_kernel.append(Assignment(pdfs(i), pdfs((i + 3) % stencil.Q))) # LB Sweep collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) @@ -159,9 +168,10 @@ lb_method = collision_rule.method no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip', - boundary_object=NoSlip()) + boundary_object=NoSlip(), field_data_type=pdf_data_type) ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB', - boundary_object=UBB([0.05, 0, 0], data_type=field_type)) + boundary_object=UBB([0.05, 0, 0], data_type=field_data_type), + field_data_type=pdf_data_type) generate_lbm_package(ctx, name="UniformGridGPU", collision_rule=collision_rule, @@ -169,12 +179,12 @@ nonuniform=False, boundaries=[no_slip, ubb], macroscopic_fields=macroscopic_fields, target=ps.Target.GPU, gpu_indexing_params=gpu_indexing_params, - max_threads=max_threads, set_pre_collision_pdfs=False) + data_type=field_data_type, pdfs_data_type=pdf_data_type, + max_threads=max_threads) # Stream only kernel - vp = [('int32_t', 'cudaBlockSize0'), ('int32_t', 'cudaBlockSize1'), ('int32_t', 'cudaBlockSize2')] - generate_sweep(ctx, 'UniformGridGPU_StreamOnlyKernel', stream_only_kernel, field_swaps=field_swaps_stream_only, - gpu_indexing_params=gpu_indexing_params, varying_parameters=vp, target=ps.Target.GPU, + generate_sweep(ctx, 'UniformGridGPU_StreamOnlyKernel', stream_only_kernel, + gpu_indexing_params={'block_size': (128, 1, 1)}, target=ps.Target.GPU, max_threads=max_threads) infoHeaderParams = { diff --git a/apps/benchmarks/UniformGridCPU/simulation_setup/PizDaintJobScript.py b/apps/benchmarks/UniformGridGPU/old_ideas/PizDaintJobScript.py similarity index 100% rename from apps/benchmarks/UniformGridCPU/simulation_setup/PizDaintJobScript.py rename to apps/benchmarks/UniformGridGPU/old_ideas/PizDaintJobScript.py diff --git a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py index 74be4378..346a596e 100755 --- a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py +++ b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py @@ -6,11 +6,25 @@ import sqlite3 from math import prod +try: + import machinestate as ms +except ImportError: + ms = None + # Number of time steps run for a workload of 128^3 per GPU # if double as many cells are on the GPU, half as many time steps are run etc. # increase this to get more reliable measurements TIME_STEPS_FOR_128_BLOCK = 1000 DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3") +BENCHMARK = int(os.environ.get('BENCHMARK', 0)) + +WeakX = int(os.environ.get('WeakX', 128)) +WeakY = int(os.environ.get('WeakY', 128)) +WeakZ = int(os.environ.get('WeakZ', 128)) + +StrongX = int(os.environ.get('StrongX', 128)) +StrongY = int(os.environ.get('StrongY', 128)) +StrongZ = int(os.environ.get('StrongZ', 128)) BASE_CONFIG = { 'DomainSetup': { @@ -39,6 +53,8 @@ def num_time_steps(block_size, time_steps_for_128_block=200): cells = block_size[0] * block_size[1] * block_size[2] time_steps = (128 ** 3 / cells) * time_steps_for_128_block + if time_steps < 10: + time_steps = 10 return int(time_steps) @@ -61,13 +77,13 @@ def __init__(self, cells_per_block=(256, 128, 128), periodic=(1, 1, 1), cuda_blo inner_outer_split=(1, 1, 1), warmup_steps=5, outer_iterations=3, init_shear_flow=False, boundary_setup=False, vtk_write_frequency=0, remaining_time_logger_frequency=-1, - additional_info=None): + additional_info=None, blocks=None, db_file_name=None): if boundary_setup: init_shear_flow = False periodic = (0, 0, 0) - self.blocks = block_decomposition(wlb.mpi.numProcesses()) + self.blocks = blocks if blocks else block_decomposition(wlb.mpi.numProcesses()) self.cells_per_block = cells_per_block self.periodic = periodic @@ -85,6 +101,7 @@ def __init__(self, cells_per_block=(256, 128, 128), periodic=(1, 1, 1), cuda_blo self.vtk_write_frequency = vtk_write_frequency self.remaining_time_logger_frequency = remaining_time_logger_frequency + self.db_file_name = DB_FILE if db_file_name is None else db_file_name self.config_dict = self.config(print_dict=False) self.additional_info = additional_info @@ -97,7 +114,6 @@ def config(self, print_dict=True): 'blocks': self.blocks, 'cellsPerBlock': self.cells_per_block, 'periodic': self.periodic, - 'oneBlockPerProcess': True }, 'Parameters': { 'omega': self.omega, @@ -115,7 +131,6 @@ def config(self, print_dict=True): 'Logging': { 'logLevel': 'info', # info progress detail tracing } - } if self.boundary_setup: config_dict["Boundaries"] = ldc_setup @@ -140,6 +155,15 @@ def results_callback(self, **kwargs): data['compile_flags'] = wlb.build_info.compiler_flags data['walberla_version'] = wlb.build_info.version data['build_machine'] = wlb.build_info.build_machine + + if ms: + state = ms.MachineState(extended=False, anonymous=True) + state.generate() # generate subclasses + state.update() # read information + data["MachineState"] = str(state.get()) + else: + print("MachineState module is not available. MachineState was not saved") + sequenceValuesToScalars(data) result = data @@ -150,8 +174,8 @@ def results_callback(self, **kwargs): table_name = table_name.replace("-", "_") # - not allowed for table name would lead to syntax error for num_try in range(num_tries): try: - checkAndUpdateSchema(result, table_name, DB_FILE) - storeSingle(result, table_name, DB_FILE) + checkAndUpdateSchema(result, table_name, self.db_file_name) + storeSingle(result, table_name, self.db_file_name) break except sqlite3.OperationalError as e: wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}") @@ -200,12 +224,70 @@ def overlap_benchmark(): scenarios.add(scenario) +def no_overlap_scaling(cuda_enabled_mpi=False): + """Tests different communication overlapping strategies""" + wlb.log_info_on_root("Running scaling benchmark without communication hiding") + wlb.log_info_on_root("") + + scenarios = wlb.ScenarioManager() + # no overlap + scenarios.add(Scenario(cells_per_block=(256, 256, 256), + cuda_blocks=(128, 1, 1), + time_step_strategy='noOverlap', + inner_outer_split=(1, 1, 1), + cuda_enabled_mpi=cuda_enabled_mpi, + outer_iterations=1)) + + +def weak_scaling_overlap(cuda_enabled_mpi=False): + """Tests different communication overlapping strategies""" + wlb.log_info_on_root("Running scaling benchmark with communication hiding") + wlb.log_info_on_root("") + + scenarios = wlb.ScenarioManager() + + # overlap + for t in ["noOverlap", "simpleOverlap"]: + scenarios.add(Scenario(cells_per_block=(WeakX, WeakY, WeakZ), + cuda_blocks=(128, 1, 1), + time_step_strategy=t, + inner_outer_split=(8, 8, 8), + cuda_enabled_mpi=cuda_enabled_mpi, + outer_iterations=1, + boundary_setup=True, + db_file_name="weakScalingUniformGrid.sqlite3")) + + +def strong_scaling_overlap(cuda_enabled_mpi=False): + wlb.log_info_on_root("Running strong scaling benchmark with one block per proc with communication hiding") + wlb.log_info_on_root("") + + scenarios = wlb.ScenarioManager() + + domain_size = (StrongX, StrongY, StrongZ) + blocks = block_decomposition(wlb.mpi.numProcesses()) + cells_per_block = tuple([d // b for d, b in zip(domain_size, reversed(blocks))]) + + # overlap + for t in ["noOverlap", "simpleOverlap"]: + scenarios.add(Scenario(cells_per_block=cells_per_block, + cuda_blocks=(128, 1, 1), + time_step_strategy=t, + inner_outer_split=(1, 1, 1), + cuda_enabled_mpi=cuda_enabled_mpi, + outer_iterations=1, + timesteps=50, + blocks=blocks, + boundary_setup=True, + db_file_name="strongScalingUniformGridOneBlock.sqlite3")) + + def single_gpu_benchmark(): """Benchmarks only the LBM compute kernel""" wlb.log_info_on_root("Running single GPU benchmarks") wlb.log_info_on_root("") - gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8)) + gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 40)) gpu_mem = gpu_mem_gb * (2 ** 30) gpu_type = os.environ.get('GPU_TYPE') @@ -214,12 +296,8 @@ def single_gpu_benchmark(): additional_info['gpu_type'] = gpu_type scenarios = wlb.ScenarioManager() - block_sizes = [(i, i, i) for i in (32, 64, 128, 256)] - cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), (512, 1, 1), - (32, 2, 1), (64, 2, 1), (128, 2, 1), (256, 2, 1), - (32, 4, 1), (64, 4, 1), (128, 4, 1), - (32, 8, 1), (64, 8, 1), - (32, 16, 1)] + block_sizes = [(i, i, i) for i in (128, 256, 320)] + cuda_blocks = [(128, 1, 1), ] for block_size in block_sizes: for cuda_block_size in cuda_blocks: # cuda_block_size = (256, 1, 1) and block_size = (64, 64, 64) would be cut to cuda_block_size = (64, 1, 1) @@ -266,4 +344,14 @@ def validation_run(): # performance of compute kernel (no communication) # overlap_benchmark() # benchmarks different communication overlap options # profiling() # run only two timesteps on a smaller domain for profiling only -validation_run() +# validation_run() + +if BENCHMARK == 0: + single_gpu_benchmark() +elif BENCHMARK == 1: + weak_scaling_overlap(True) +elif BENCHMARK == 2: + strong_scaling_overlap(True) +else: + validation_run() + diff --git a/apps/showcases/Thermocapillary/benchmark.py b/apps/showcases/Thermocapillary/benchmark.py index 6493eda5..a08e2688 100755 --- a/apps/showcases/Thermocapillary/benchmark.py +++ b/apps/showcases/Thermocapillary/benchmark.py @@ -6,6 +6,11 @@ from waLBerla.tools.config import block_decomposition from waLBerla.tools.sqlitedb import sequenceValuesToScalars +try: + import machinestate as ms +except ImportError: + ms = None + def num_time_steps(block_size, time_steps_for_256_block=50): # Number of time steps run for a workload of 256^3 cells per process @@ -137,6 +142,14 @@ def write_benchmark_results(self, **kwargs): data['walberla_version'] = wlb.build_info.version data['build_machine'] = wlb.build_info.build_machine + if ms: + state = ms.MachineState(extended=False, anonymous=True) + state.generate() # generate subclasses + state.update() # read information + data["MachineState"] = str(state.get()) + else: + print("MachineState module is not available. MachineState was not saved") + sequenceValuesToScalars(data) csv_file = f"thermocapillary_benchmark.csv" diff --git a/python/lbmpy_walberla/packing_kernels.py b/python/lbmpy_walberla/packing_kernels.py index 8a872803..862a6cda 100644 --- a/python/lbmpy_walberla/packing_kernels.py +++ b/python/lbmpy_walberla/packing_kernels.py @@ -7,8 +7,10 @@ from jinja2 import Environment, PackageLoader, StrictUndefined from pystencils import Assignment, CreateKernelConfig, create_kernel, Field, FieldType, fields, Target +from pystencils.astnodes import LoopOverCoordinate +from pystencils.integer_functions import int_div from pystencils.stencil import offset_to_direction_string -from pystencils.typing import TypedSymbol +from pystencils.typing import TypedSymbol, BasicType, PointerType, FieldPointerSymbol from pystencils.stencil import inverse_direction from pystencils.bit_masks import flag_cond @@ -18,7 +20,7 @@ from lbmpy.stencils import LBStencil from pystencils_walberla.cmake_integration import CodeGenerationContext -from pystencils_walberla.kernel_selection import KernelFamily, KernelCallNode, SwitchNode +from pystencils_walberla.kernel_selection import KernelFamily, KernelCallNode, SwitchNode, AbortNode from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env from pystencils_walberla.utility import config_from_context @@ -101,6 +103,23 @@ def __init__(self, stencil, streaming_pattern, class_name, config: CreateKernelC self.accessors = [get_accessor(streaming_pattern, t) for t in get_timesteps(streaming_pattern)] self.mask_field = fields(f'mask : uint32 [{self.dim}D]', layout=src_field.layout) + self.block_wise = True + if not self.inplace or not self.config.target == Target.GPU: + self.block_wise = False + + self.index = TypedSymbol("index", dtype=BasicType(np.int64)) + self.index_shape = TypedSymbol("_size_0", dtype=BasicType(np.int64)) + self.src_ptr_type = PointerType(self.src_field.dtype, const=True, restrict=True, double_pointer=True) + self.src_ptr = FieldPointerSymbol(self.src_field.name, self.src_field.dtype, const=True) + self.dst_ptr_type = PointerType(self.dst_field.dtype, const=False, restrict=True, double_pointer=True) + self.dst_ptr = FieldPointerSymbol(self.dst_field.name, self.dst_field.dtype, const=False) + + self.data_src = TypedSymbol(f"_data_{self.src_field.name}_dp", dtype=self.src_ptr_type) + self.data_dst = TypedSymbol(f"_data_{self.dst_field.name}_dp", dtype=self.dst_ptr_type) + + self.f = sp.IndexedBase(self.data_src, shape=self.index_shape) + self.d = sp.IndexedBase(self.data_dst, shape=self.index_shape) + def create_uniform_kernel_families(self, kernels_dict=None): kernels = dict() if kernels_dict is None else kernels_dict @@ -115,6 +134,8 @@ def create_uniform_kernel_families(self, kernels_dict=None): def create_nonuniform_kernel_families(self, kernels_dict=None): kernels = dict() if kernels_dict is None else kernels_dict + kernels['localCopyRedistribute'] = self.get_local_copy_redistribute_kernel_family() + kernels['localPartialCoalescence'] = self.get_local_copy_partial_coalescence_kernel_family() kernels['unpackRedistribute'] = self.get_unpack_redistribute_kernel_family() kernels['packPartialCoalescence'] = self.get_pack_partial_coalescence_kernel_family() kernels['zeroCoalescenceRegion'] = self.get_zero_coalescence_region_kernel_family() @@ -231,7 +252,10 @@ def get_local_copy_direction_ast(self, comm_dir, timestep): dir_string = offset_to_direction_string(comm_dir) streaming_dirs = self.get_streaming_dirs(comm_dir) src, dst = self._stream_out_accs(timestep) - assignments = [] + assignments = list() + if self.block_wise: + assignments.append(Assignment(self.src_ptr, self.f[self.index])) + assignments.append(Assignment(self.dst_ptr, self.d[self.index])) dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs) if len(dir_indices) == 0: return None @@ -283,15 +307,59 @@ def get_unpack_redistribute_ast(self, comm_dir, timestep): return create_kernel(assignments, config=config) def get_unpack_redistribute_kernel_family(self): - return self._construct_directionwise_kernel_family(self.get_unpack_redistribute_ast) + return self._construct_directionwise_kernel_family(self.get_unpack_redistribute_ast, + exclude_time_step=Timestep.EVEN) def get_local_copy_redistribute_ast(self, comm_dir, timestep): - # TODO - raise NotImplementedError() + assert not all(d == 0 for d in comm_dir) + ctr = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(self.stencil.D)] + + dir_string = offset_to_direction_string(comm_dir) + streaming_dirs = self.get_streaming_dirs(inverse_direction(comm_dir)) + dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs) + if len(dir_indices) == 0: + return None + + # for inplace streaming the dst (fine grid) must always be on odd state + dst_timestep = Timestep.ODD if self.inplace else Timestep.BOTH + + _, dst = self._stream_out_accs(dst_timestep) + src, _ = self._stream_out_accs(timestep) + + src_abs = self.src_field.new_field_with_different_name(self.src_field.name) + src_abs.field_type = FieldType.CUSTOM + + orthos = self.orthogonal_principals(comm_dir) + sub_dirs = self.contained_principals(comm_dir) + orthogonal_combinations = self.linear_combinations(orthos) + subdir_combinations = self.linear_combinations_nozero(sub_dirs) + second_gl_dirs = [o + s for o, s in product(orthogonal_combinations, subdir_combinations)] + negative_dir_correction = np.array([(1 if d == -1 else 0) for d in comm_dir]) + assignments = [] + for offset in orthogonal_combinations: + o = offset + negative_dir_correction + for d in range(self.values_per_cell): + field_acc = dst[d].get_shifted(*o) + src_access = [int_div(ctr[i], 2) + o for i, o in enumerate(src[d].offsets)] + assignments.append(Assignment(field_acc, src_abs.absolute_access(src_access, (d, )))) + + for offset in second_gl_dirs: + o = offset + negative_dir_correction + for d in dir_indices: + field_acc = dst[d].get_shifted(*o) + src_access = [int_div(ctr[i], 2) + o for i, o in enumerate(src[d].offsets)] + assignments.append(Assignment(field_acc, src_abs.absolute_access(src_access, (d, )))) + + function_name = f'localCopyRedistribute_{dir_string}' + timestep_suffix(timestep) + iteration_slice = tuple(slice(None, None, 2) for _ in range(self.dim)) + config = CreateKernelConfig(function_name=function_name, iteration_slice=iteration_slice, + data_type=self.data_type, ghost_layers=0, allow_double_writes=True, + cpu_openmp=self.config.cpu_openmp, target=self.config.target) + + return create_kernel(assignments, config=config) def get_local_copy_redistribute_kernel_family(self): - # TODO - raise NotImplementedError() + return self._construct_directionwise_kernel_family(self.get_local_copy_redistribute_ast) # --------------------------- Pack / Unpack / LocalCopy Fine to Coarse --------------------------------------------- @@ -322,7 +390,8 @@ def get_pack_partial_coalescence_ast(self, comm_dir, timestep): return ast def get_pack_partial_coalescence_kernel_family(self): - return self._construct_directionwise_kernel_family(self.get_pack_partial_coalescence_ast) + return self._construct_directionwise_kernel_family(self.get_pack_partial_coalescence_ast, + exclude_time_step=Timestep.ODD) def get_unpack_coalescence_ast(self, comm_dir, timestep): config = replace(self.config, ghost_layers=0) @@ -370,12 +439,53 @@ def get_zero_coalescence_region_ast(self, comm_dir, timestep): def get_zero_coalescence_region_kernel_family(self): return self._construct_directionwise_kernel_family(self.get_zero_coalescence_region_ast) - # TODO def get_local_copy_partial_coalescence_ast(self, comm_dir, timestep): - raise NotImplementedError() + assert not all(d == 0 for d in comm_dir) + ctr = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(self.stencil.D)] + + dir_string = offset_to_direction_string(comm_dir) + streaming_dirs = self.get_streaming_dirs(comm_dir) + dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs) + + if len(dir_indices) == 0: + return None + buffer = sp.symbols(f"b_:{self.values_per_cell}") + + # for inplace streaming the src (fine grid) must always be on even state + src_timestep = Timestep.ODD if self.inplace else Timestep.BOTH + + src, _ = self._stream_in_accs(src_timestep) + _, dst = self._stream_in_accs(timestep.next()) + mask = self.mask_field + + dst_abs = self.dst_field.new_field_with_different_name(self.dst_field.name) + dst_abs.field_type = FieldType.CUSTOM + + coalescence_factor = sp.Rational(1, 2 ** self.dim) + + offsets = list(product(*((0, 1) for _ in comm_dir))) + assignments = [] + for i, d in enumerate(dir_indices): + acc = 0 + for o in offsets: + acc += flag_cond(d, mask[o], src[d].get_shifted(*o)) + assignments.append(Assignment(buffer[i], acc)) + + for i, d in enumerate(dir_indices): + index = dst[d].index + dst_access = [int_div(ctr[i], 2) + o for i, o in enumerate(dst[d].offsets)] + assignments.append(Assignment(dst_abs.absolute_access(dst_access, index), + dst_abs.absolute_access(dst_access, index) + coalescence_factor * buffer[i])) + + iteration_slice = tuple(slice(None, None, 2) for _ in range(self.dim)) + config = replace(self.config, iteration_slice=iteration_slice, ghost_layers=0) + + ast = create_kernel(assignments, config=config) + ast.function_name = f'localPartialCoalescence_{dir_string}' + timestep_suffix(timestep) + return ast def get_local_copy_partial_coalescence_kernel_family(self): - raise NotImplementedError() + return self._construct_directionwise_kernel_family(self.get_local_copy_partial_coalescence_ast) # ------------------------------------------ Utility --------------------------------------------------------------- @@ -425,7 +535,7 @@ def linear_combinations_nozero(self, vectors): # --------------------------- Private Members ---------------------------------------------------------------------- - def _construct_directionwise_kernel_family(self, create_ast_callback): + def _construct_directionwise_kernel_family(self, create_ast_callback, exclude_time_step=None): subtrees = [] direction_symbol = TypedSymbol('dir', dtype='stencil::Direction') for t in get_timesteps(self.streaming_pattern): @@ -439,7 +549,10 @@ def _construct_directionwise_kernel_family(self, create_ast_callback): continue kernel_call = KernelCallNode(ast) cases_dict[f"stencil::{dir_string}"] = kernel_call - subtrees.append(SwitchNode(direction_symbol, cases_dict)) + if exclude_time_step is not None and t == exclude_time_step: + subtrees.append(AbortNode("This function can not be called! Please contact the waLBerla team")) + else: + subtrees.append(SwitchNode(direction_symbol, cases_dict)) if not self.inplace: tree = subtrees[0] diff --git a/python/lbmpy_walberla/storage_specification.py b/python/lbmpy_walberla/storage_specification.py index 60fd96d2..ae56ff73 100644 --- a/python/lbmpy_walberla/storage_specification.py +++ b/python/lbmpy_walberla/storage_specification.py @@ -113,8 +113,8 @@ def generate_lbm_storage_specification(generation_context: CodeGenerationContext 'kernels': kernels, 'direction_sizes': cg.get_direction_sizes(), 'src_field': cg.src_field, - 'dst_field': cg.dst_field - + 'dst_field': cg.dst_field, + 'block_wise': cg.block_wise } if nonuniform: jinja_context['mask_field'] = cg.mask_field diff --git a/python/lbmpy_walberla/sweep_collection.py b/python/lbmpy_walberla/sweep_collection.py index bc8bdda4..164dd94d 100644 --- a/python/lbmpy_walberla/sweep_collection.py +++ b/python/lbmpy_walberla/sweep_collection.py @@ -1,13 +1,17 @@ from dataclasses import replace from typing import Dict +from jinja2 import Environment, PackageLoader, StrictUndefined + import sympy as sp import numpy as np -from pystencils import Target, create_kernel +from pystencils import Target, create_kernel, Assignment +from pystencils.bit_masks import flag_cond from pystencils.config import CreateKernelConfig -from pystencils.field import Field +from pystencils.field import Field, fields from pystencils.simp import add_subexpressions_for_field_reads +from pystencils.typing import BasicType, PointerType, FieldPointerSymbol, TypedSymbol, CastFunc from lbmpy.advanced_streaming import is_inplace, get_accessor, Timestep from lbmpy.creationfunctions import LbmCollisionRule, LBMConfig, LBMOptimisation @@ -17,8 +21,8 @@ from pystencils_walberla.kernel_selection import KernelCallNode, KernelFamily from pystencils_walberla.utility import config_from_context -from pystencils_walberla import generate_sweep_collection -from lbmpy_walberla.utility import create_pdf_field +from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env +from lbmpy_walberla.utility import create_pdf_field, timestep_suffix from .alternating_sweeps import EvenIntegerCondition from .function_generator import kernel_family_function_generator @@ -59,7 +63,6 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli field_layout=lbm_optimisation.field_layout) config = replace(config, ghost_layers=0) - function_generators = [] def family(name): @@ -68,6 +71,13 @@ def family(name): def generator(name, kernel_family): return kernel_family_function_generator(name, kernel_family, namespace='lbm', max_threads=max_threads) + all_fields = collision_rule.bound_fields.union(collision_rule.free_fields) + all_fields.update({src_field, dst_field}) + all_fields = list(sorted(all_fields, key=lambda e: str(e))) + + bw_stream_collide = block_wise_stream_collide(class_name, collision_rule, lbm_config, src_field, dst_field, config) + bw_stream = block_wise_stream(class_name, collision_rule, lbm_config, src_field, dst_field, config) + function_generators.append(generator('streamCollide', family("streamCollide"))) function_generators.append(generator('collide', family("collide"))) function_generators.append(generator('stream', family("stream"))) @@ -87,7 +97,65 @@ def generator(name, kernel_family): namespace='lbm', max_threads=max_threads) function_generators.append(getter_generator) - generate_sweep_collection(ctx, class_name, function_generators, refinement_scaling) + contexts_function_generators = list() + for fct in function_generators: + contexts_function_generators.append(fct()) + + namespaces = set([context['namespace'] for context in contexts_function_generators]) + assert len(namespaces) == 1, "All function_generators must output the same namespace!" + namespace = namespaces.pop() + + headers = set() + for context in contexts_function_generators: + for header in context['interface_spec'].headers: + headers.add(header) + for header in context['kernel'].get_headers(): + headers.add(header) + + kernel_list = list() + for context in contexts_function_generators: + kernel_list.append(context['kernel']) + + kernels = list() + for context in contexts_function_generators: + kernels.append({ + 'kernel': context['kernel'], + 'function_name': context['function_name'], + 'ghost_layers_to_include': 'ghost_layers', + 'field': context['field'], + 'max_threads': context['max_threads'] + }) + + target = kernels[0]['kernel'].target + + jinja_context = { + 'block_stream_collide': bw_stream_collide, + 'block_stream': bw_stream, + 'all_fields': all_fields, + 'pdf_field': src_field, + 'kernel_list': kernel_list, + 'kernels': kernels, + 'namespace': namespace, + 'class_name': class_name, + 'headers': headers, + 'target': target.name.lower(), + 'is_gpu': target == Target.GPU, + 'parameter_scaling': refinement_scaling, + 'stencil_name': lbm_config.stencil.name, + 'D': lbm_config.stencil.D, + 'Q': lbm_config.stencil.Q, + 'inplace': is_inplace(lbm_config.streaming_pattern) + } + + env = Environment(loader=PackageLoader('lbmpy_walberla'), undefined=StrictUndefined) + add_pystencils_filters_to_jinja_env(env) + + header = env.get_template("LBMSweepCollection.tmpl.h").render(**jinja_context) + source = env.get_template("LBMSweepCollection.tmpl.cpp").render(**jinja_context) + + source_extension = "cu" if target == Target.GPU and ctx.cuda else "cpp" + ctx.write_file(f"{class_name}.h", header) + ctx.write_file(f"{class_name}.{source_extension}", source) class RefinementScaling: @@ -175,7 +243,7 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi default_dtype = config.data_type.default_factory() - get_timestep = {"field_name": pdfs.name, "function": "getTimestep"} + get_timestep = {"field_name": pdfs.name, "function": "getTimestepPlusOne"} temporary_fields = () field_swaps = () @@ -252,3 +320,104 @@ def get_getter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi family = KernelFamily(node, class_name, temporary_fields=temporary_fields, field_swaps=field_swaps) return family + + +def block_wise_stream_collide(class_name, collision_rule, lbm_config, src_field, dst_field, config): + + if not is_inplace(lbm_config.streaming_pattern): + return None + else: + ast_even, all_fields = create_block_wise_ast(collision_rule, src_field, dst_field, + lbm_config, Timestep.EVEN, config, False) + even_call = KernelCallNode(ast_even) + ast_odd, _ = create_block_wise_ast(collision_rule, src_field, dst_field, + lbm_config, Timestep.ODD, config, False) + odd_call = KernelCallNode(ast_odd) + tree = EvenIntegerCondition('timestep', even_call, odd_call, parameter_dtype=np.uint8) + + family = KernelFamily(tree, class_name) + + indexed_to_field_name = dict() + for field in all_fields: + indexed_to_field_name[field.name] = f"_data_{field.name}_dp" + + context = { + 'kernel': family, + 'all_fields': all_fields, + 'namespace': 'lbm', + 'function_name': 'blockStreamCollide', + 'indexed_to_field_name': indexed_to_field_name, + 'max_threads': None + } + + return context + + +def block_wise_stream(class_name, collision_rule, lbm_config, src_field, dst_field, config): + + if not is_inplace(lbm_config.streaming_pattern): + return None + else: + ast_even, all_fields = create_block_wise_ast(collision_rule, src_field, dst_field, + lbm_config, Timestep.EVEN, config, True) + even_call = KernelCallNode(ast_even) + ast_odd, _ = create_block_wise_ast(collision_rule, src_field, dst_field, + lbm_config, Timestep.ODD, config, True) + odd_call = KernelCallNode(ast_odd) + tree = EvenIntegerCondition('timestep', even_call, odd_call, parameter_dtype=np.uint8) + + family = KernelFamily(tree, class_name) + + indexed_to_field_name = dict() + for field in all_fields: + indexed_to_field_name[field.name] = f"_data_{field.name}_dp" + + context = { + 'kernel': family, + 'all_fields': all_fields, + 'namespace': 'lbm', + 'function_name': 'blockStream', + 'indexed_to_field_name': indexed_to_field_name, + 'max_threads': None + } + + return context + + +def create_block_wise_ast(collision_rule, src_field, dst_field, lbm_config, timestep, config, stream_only): + stencil = lbm_config.stencil + streaming_pattern = lbm_config.streaming_pattern + default_dtype = config.data_type.default_factory() + config = replace(config, gpu_indexing_params={}) + + accessor = get_accessor(streaming_pattern, timestep) + + if stream_only: + update_rule = create_stream_only_kernel(stencil, src_field, dst_field, accessor) + else: + update_rule = create_lbm_kernel(collision_rule, src_field, dst_field, accessor, data_type=default_dtype) + + bound_fields = update_rule.bound_fields + free_fields = update_rule.free_fields + + all_fields = list(bound_fields.union(free_fields)) + all_fields.sort(key=lambda field: field.name) + + index = TypedSymbol("index", dtype=BasicType(np.int64)) + index_shape = TypedSymbol("_size_0", dtype=BasicType(np.int64)) + + ass = list() + for field in all_fields: + const = True if field in free_fields else False + ptr_type = PointerType(field.dtype, const=const, restrict=True, double_pointer=True) + ptr = FieldPointerSymbol(field.name, field.dtype, const=const) + f = sp.IndexedBase(TypedSymbol(f"_data_{field.name}_dp", dtype=ptr_type), shape=index_shape) + ass.append(Assignment(ptr, f[index])) + + update_rule = ass + update_rule.all_assignments + + ast = create_kernel(update_rule, config=config) + base_name = "kernel_BlockStream" if stream_only else "kernel_BlockStreamCollide" + ast.function_name = base_name + timestep_suffix(timestep) + ast.assumed_inner_stride_one = config.cpu_vectorize_info['assume_inner_stride_one'] + return ast, all_fields diff --git a/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h b/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h index 453f78e0..4e19d069 100644 --- a/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h +++ b/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h @@ -41,12 +41,12 @@ class {{class_name}} enum Type { ALL = 0, INNER = 1, OUTER = 2 }; - {{class_name}}( {{- ["const shared_ptr & blocks", "BlockDataID flagID_", "BlockDataID pdfsID_", "FlagUID domainUID_", [kernel_list|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'pdfs'])], additional_constructor_arguments] | type_identifier_list -}} ) + {{class_name}}( {{- ["const shared_ptr & blocks", "BlockDataID flagID_", "BlockDataID pdfsID_", "FlagUID domainUID_", [kernel_list|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize', 'pdfs'])], additional_constructor_arguments] | type_identifier_list -}} ) : blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_) { {% for object_name, boundary_class, kernel, additional_data_handler in zip(object_names, boundary_classes, kernel_list, additional_data_handlers) -%} - {{object_name}} = std::make_shared< {{boundary_class}} >({{- ["blocks", [kernel|generate_function_collection_call(['indexVector', 'indexVectorSize', 'timestep', 'gpuStream'], use_field_ids=True)], additional_data_handler.constructor_argument_name] | type_identifier_list -}}); + {{object_name}} = std::make_shared< {{boundary_class}} >({{- ["blocks", "pdfsID", [kernel|generate_function_collection_call(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize', 'pdfs', 'timestep', 'gpuStream'], use_field_ids=True)], additional_data_handler.constructor_argument_name] | type_identifier_list -}}); {% endfor %} {% for object_name, flag_uid in zip(object_names, flag_uids) -%} diff --git a/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.cpp b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.cpp new file mode 100644 index 00000000..a691af5d --- /dev/null +++ b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.cpp @@ -0,0 +1,156 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file {{class_name}}.cpp +//! \\author pystencils +//====================================================================================================================== +#include "{{class_name}}.h" + +{% if target is equalto 'cpu' -%} +#define FUNC_PREFIX +{%- elif target is equalto 'gpu' -%} +#define FUNC_PREFIX __global__ +{%- endif %} + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-variable" +# pragma GCC diagnostic ignored "-Wignored-qualifiers" +#endif + +#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL ) +#pragma warning push +#pragma warning( disable : 1599 ) +#endif + +#ifdef __CUDACC__ +#pragma push +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diag_suppress 191 +#else +#pragma diag_suppress 191 +#endif +#endif + +using namespace std; + +namespace walberla { +namespace {{namespace}} { + +{%if block_stream_collide -%} +{{block_stream_collide['kernel']|generate_definitions(target, block_stream_collide['max_threads'])}} +{{block_stream['kernel']|generate_definitions(target, block_stream['max_threads'])}} +{%endif%} + +{% for kernel in kernels %} +{{kernel['kernel']|generate_definitions(target, kernel['max_threads'])}} +{% endfor %} + +void {{class_name}}::blockStreamCollide({{- ["[[maybe_unused]] uint_t level", "[[maybe_unused]] uint8_t timestep", ["[[maybe_unused]] gpuStream_t stream"] if target == 'gpu' else []] | type_identifier_list -}}) +{ + {%if block_stream_collide -%} + + {%if target is equalto 'gpu' -%} + dim3 _grid = grid_[level]; + dim3 _block = block_[level]; + + {%- for field in block_stream_collide['all_fields'] %} + {{field.dtype.c_name}} ** {{block_stream_collide['indexed_to_field_name'][field.name]}} = {{field.name}}PointersGPU[level]; + {%- endfor %} + + {% else %} + + {%- for field in block_stream_collide['all_fields'] %} + {{field.dtype.c_name}} ** {{block_stream_collide['indexed_to_field_name'][field.name]}} = {{field.name}}Pointers[level].data(); + {%- endfor %} + + {%- endif %} + const int64_t _size_0 = size_0[level]; + int64_t _size_{{block_stream_collide['all_fields'][0].name}}_0 = size_1; + int64_t _size_{{block_stream_collide['all_fields'][0].name}}_1 = size_2; + int64_t _size_{{block_stream_collide['all_fields'][0].name}}_2 = size_3; + + {{block_stream_collide['kernel']|generate_field_strides()|indent(3)}} + {{block_stream_collide['kernel']|generate_refs_for_kernel_parameters(prefix="this->", parameters_to_ignore=["_size_0"], ignore_fields=True, parameter_registration=parameter_scaling, level_known=True)|indent(3)}} + {{block_stream_collide['kernel']|generate_call(stream='stream', plain_kernel_call=True)|indent(3)}} + + {%endif%} +} + +void {{class_name}}::ghostLayerPropagation({{- ["[[maybe_unused]] uint_t level", "[[maybe_unused]] uint8_t timestep", ["[[maybe_unused]] gpuStream_t stream"] if target == 'gpu' else []] | type_identifier_list -}}) +{ + {%if block_stream_collide -%} + + {{block_stream['kernel']|generate_field_strides()|indent(3)}} + + {%if target is equalto 'gpu' -%} + auto parallelSection_ = parallelStreams_.parallelSection( stream ); + for (auto it = glPropagationPDFs[level].begin(); it != glPropagationPDFs[level].end(); it++){ + if(it->second.empty()){ continue;} + + int64_t _size_0 = int64_c(it->second.size()); + int64_t _size_{{pdf_field.name}}_0 = std::get<0>(it->first); + int64_t _size_{{pdf_field.name}}_1 = std::get<1>(it->first); + int64_t _size_{{pdf_field.name}}_2 = std::get<2>(it->first); + + {{pdf_field.dtype.c_name}} ** _data_{{pdf_field.name}}_dp = glPropagationPDFsGPU[level][it->first]; + dim3 _grid = glPropagationGrid_[level][it->first]; + dim3 _block = glPropagationBlock_[level][it->first]; + parallelSection_.run([&]( auto s ) { + {{block_stream['kernel']|generate_call(stream='s', plain_kernel_call=True)|indent(9)}} + }); + } + + {% else %} + + for (auto it = glPropagationPDFs[level].begin(); it != glPropagationPDFs[level].end(); it++){ + if(it->second.empty()){ continue;} + + int64_t _size_0 = int64_c(it->second.size()); + int64_t _size_{{pdf_field.name}}_0 = std::get<0>(it->first); + int64_t _size_{{pdf_field.name}}_1 = std::get<1>(it->first); + int64_t _size_{{pdf_field.name}}_2 = std::get<2>(it->first); + + {{pdf_field.dtype.c_name}} ** _data_{{pdf_field.name}}_dp = it->second.data(); + {{block_stream['kernel']|generate_call(stream='s', plain_kernel_call=True)|indent(6)}} + } + {%- endif %} + + {%endif%} +} + +{% for kernel in kernels %} +void {{class_name}}::{{kernel['function_name']}}( {{kernel['kernel']|generate_plain_parameter_list(ghost_layers=True)}} ) +{ + {{kernel['kernel']|generate_call(ghost_layers_to_include=kernel['ghost_layers_to_include'], stream='stream')|indent(3)}} +} +void {{class_name}}::{{kernel['function_name']}}CellInterval( {{kernel['kernel']|generate_plain_parameter_list(cell_interval='ci')}}) +{ + {{kernel['kernel']|generate_call(stream='stream', cell_interval='ci')|indent(3)}} +} +{% endfor %} + + +} // namespace {{namespace}} +} // namespace walberla + + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic pop +#endif + +#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL ) +#pragma warning pop +#endif diff --git a/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h new file mode 100644 index 00000000..9471b6f2 --- /dev/null +++ b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h @@ -0,0 +1,511 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file {{class_name}}.h +//! \\author pystencils +//====================================================================================================================== + +#pragma once + +#include "blockforest/StructuredBlockForest.h" +#include "blockforest/BlockID.h" +#include "blockforest/Block.h" + +#include "core/DataTypes.h" +#include "core/logging/Logging.h" +#include "core/Macros.h" + +#include "field/AddToStorage.h" +#include "field/FlagField.h" +#include "field/iterators/FieldIterator.h" + +{% if target is equalto 'gpu' -%} +#include "gpu/AddGPUFieldToStorage.h" +#include "gpu/GPUField.h" +#include "gpu/ParallelStreams.h" +{%- endif %} + +#include "domain_decomposition/BlockDataID.h" +#include "domain_decomposition/IBlock.h" +#include "domain_decomposition/StructuredBlockStorage.h" + +#include "field/SwapableCompare.h" +#include "field/GhostLayerField.h" + +#include "stencil/Directions.h" +#include "stencil/{{stencil_name}}.h" + +#include +#include + +{% for header in headers %} +#include {{header}} +{% endfor %} + +using namespace std::placeholders; + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-parameter" +# pragma GCC diagnostic ignored "-Wreorder" +#endif + +namespace walberla { +namespace {{namespace}} { + +class {{class_name}} +{ + public: + enum Type { ALL = 0, INNER = 1, OUTER = 2 }; + using sizeTuple = std::tuple; + + {{class_name}}(const shared_ptr< StructuredBlockForest > & blocks, {{kernel_list|generate_constructor_parameters}}, const Cell & outerWidth=Cell(1, 1, 1)) + : blocks_(blocks), {{ kernel_list|generate_constructor_initializer_list(parameter_registration=parameter_scaling) }}, outerWidth_(outerWidth) + { + + {{kernel_list|generate_constructor(parameter_registration=parameter_scaling) |indent(6)}} + validInnerOuterSplit_ = true; + + for (auto& iBlock : *blocks) + { + if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 || + int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 || + int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2) + validInnerOuterSplit_ = false; + } + } + + void initialiseBlockPointer() + { + {%if block_stream_collide -%} + blockWise_ = true; + + size_0.resize(blocks_->getNumberOfLevels()); + {%- for field in block_stream_collide['all_fields'] %} + {{field.name}}Pointers.resize(blocks_->getNumberOfLevels()); + {%if target is equalto 'gpu' -%} {{field.name}}PointersGPU.resize(blocks_->getNumberOfLevels()); {% endif %} + {%- endfor %} + + {%if target is equalto 'gpu' -%} block_.resize(blocks_->getNumberOfLevels()); {% endif %} + {%if target is equalto 'gpu' -%} grid_.resize(blocks_->getNumberOfLevels()); {% endif %} + + glPropagationPDFs.resize(blocks_->getNumberOfLevels()); + {%if target is equalto 'gpu' -%} glPropagationPDFsGPU.resize(blocks_->getNumberOfLevels()); {% endif %} + + {%if target is equalto 'gpu' -%} glPropagationBlock_.resize(blocks_->getNumberOfLevels()); {% endif %} + {%if target is equalto 'gpu' -%} glPropagationGrid_.resize(blocks_->getNumberOfLevels()); {% endif %} + + for( auto it = blocks_->begin(); it != blocks_->end(); ++it ) + { + auto* local = dynamic_cast< Block* >(it.get()); + {%- for field in block_stream_collide['all_fields'] %} + auto {{field.name}} = local->getData< {{field | field_type(is_gpu=is_gpu)}} >({{field.name}}ID); + {%- endfor %} + + size_1 = int64_c({{block_stream_collide['all_fields'][0].name}}->xSize()); + size_2 = int64_c({{block_stream_collide['all_fields'][0].name}}->ySize()); + size_3 = int64_c({{block_stream_collide['all_fields'][0].name}}->zSize()); + + {%- for field in block_stream_collide['all_fields'] %} + + stride_{{field.name}}_0 = int64_c({{field.name}}->xStride()); + stride_{{field.name}}_1 = int64_c({{field.name}}->yStride()); + stride_{{field.name}}_2 = int64_c({{field.name}}->zStride()); + stride_{{field.name}}_3 = int64_c(1 * int64_c({{field.name}}->fStride())); + + {%- endfor %} + break; + } + + for( auto it = blocks_->begin(); it != blocks_->end(); ++it ) + { + auto* local = dynamic_cast< Block* >(it.get()); + const uint_t level = local->getLevel(); + {%- for field in block_stream_collide['all_fields'] %} + auto {{field.name}} = local->getData< {{field | field_type(is_gpu=is_gpu)}} >({{field.name}}ID); + {{field.name}}Pointers[level].emplace_back({{field.name}}->dataAt(0, 0, 0, 0)); + {%- endfor %} + + + for(auto dir = stencil::{{stencil_name}}::beginNoCenter(); dir != stencil::{{stencil_name}}::end(); ++dir){ + uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*dir); + // Propagate on ghost layers shadowing coarse or no blocks + if(local->neighborhoodSectionHasLargerBlock(nSecIdx)){ + CellInterval ci; + {{pdf_field.name}}->getGhostRegion(*dir, ci, 1); + sizeTuple dirTuple = std::make_tuple(int64_c(ci.xSize()), int64_c(ci.ySize()), int64_c(ci.zSize())); + glPropagationPDFs[level][dirTuple].emplace_back({{pdf_field.name}}->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0)); + } + } + } + + for (uint_t level = 0; level < blocks_->getNumberOfLevels(); level++) { + size_0[level] = int64_c({{pdf_field.name}}Pointers[level].size()); + + {%if target is equalto 'gpu' -%} + + int64_t indexingX = size_1 * size_0[level]; + int64_t indexingY = size_2; + int64_t indexingZ = size_3; + + int64_t cudaBlockSize0 = 128; + int64_t cudaBlockSize1 = 1; + int64_t cudaBlockSize2 = 1; + + block_[level] = dim3((unsigned int)((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)), (unsigned int)((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))), (unsigned int)((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))); + grid_[level] = dim3((unsigned int)(( (indexingX) % (((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) == 0 ? (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) : ( (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) ) +1 )), (unsigned int)(( (indexingY) % (((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) == 0 ? (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) : ( (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) ) +1 )), (unsigned int)(( (indexingZ) % (((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) == 0 ? (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) : ( (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) ) +1 ))); + + {%- for field in block_stream_collide['all_fields'] %} + + WALBERLA_GPU_CHECK(gpuMalloc( (void**)&{{field.name}}PointersGPU[level], sizeof({{pdf_field.dtype.c_name}}* ) * {{field.name}}Pointers[level].size() )); + WALBERLA_GPU_CHECK(gpuMemcpy( {{field.name}}PointersGPU[level], &{{field.name}}Pointers[level][0], sizeof({{pdf_field.dtype.c_name}} *) * {{field.name}}Pointers[level].size(), gpuMemcpyHostToDevice )); + + {%- endfor %} + + for (auto it = glPropagationPDFs[level].begin(); it != glPropagationPDFs[level].end(); it++){ + if(it->second.empty()){ continue;} + + indexingX = std::get<0>(it->first) * int64_c(it->second.size()); + indexingY = std::get<1>(it->first); + indexingZ = std::get<2>(it->first); + + cudaBlockSize0 = 32; + cudaBlockSize1 = 1; + cudaBlockSize2 = 1; + + glPropagationBlock_[level][it->first] = dim3((unsigned int)((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)), (unsigned int)((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))), (unsigned int)((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))); + glPropagationGrid_[level][it->first] = dim3((unsigned int)(( (indexingX) % (((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) == 0 ? (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) : ( (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) ) +1 )), (unsigned int)(( (indexingY) % (((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) == 0 ? (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) : ( (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) ) +1 )), (unsigned int)(( (indexingZ) % (((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) == 0 ? (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) : ( (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) ) +1 ))); + + WALBERLA_GPU_CHECK(gpuMalloc( (void**)&glPropagationPDFsGPU[level][it->first], sizeof({{pdf_field.dtype.c_name}}* ) * it->second.size() )) + WALBERLA_GPU_CHECK(gpuMemcpy( glPropagationPDFsGPU[level][it->first], &it->second[0], sizeof({{pdf_field.dtype.c_name}}* ) * it->second.size(), gpuMemcpyHostToDevice )) + } + + {% endif %} + } + {% endif %} + + }; + + {%if block_stream_collide -%} + ~{{class_name}}() { + {%if target is equalto 'gpu' -%} + for (uint_t level = 0; level < blocks_->getNumberOfLevels(); level++) + { + {%- for field in block_stream_collide['all_fields'] %} + if(!{{field.name}}Pointers[level].empty()){ + WALBERLA_GPU_CHECK(gpuFree({{field.name}}PointersGPU[level])) + } + {%- endfor %} + + for (auto it = glPropagationPDFs[level].begin(); it != glPropagationPDFs[level].end(); it++){ + if(it->second.empty()){ continue;} + WALBERLA_GPU_CHECK(gpuFree(glPropagationPDFsGPU[level][it->first])) + } + } + {%- endif %} + } + {% else %} + {{ kernel_list| generate_destructor(class_name) |indent(4) }} + {% endif %} + + + /************************************************************************************* + * Internal Function Definitions with raw Pointer + *************************************************************************************/ + + void blockStreamCollide({{- ["uint_t level", "uint8_t timestep", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}); + void ghostLayerPropagation({{- ["uint_t level", "uint8_t timestep", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}); + bool blockWise() {return blockWise_;}; + + {%- for kernel in kernels %} + static void {{kernel['function_name']}} ({{kernel['kernel']|generate_plain_parameter_list(ghost_layers=0, stream="nullptr")}}); + static void {{kernel['function_name']}}CellInterval ({{kernel['kernel']|generate_plain_parameter_list(cell_interval='ci', stream="nullptr")}}); + {% endfor %} + + /************************************************************************************* + * Function Definitions for external Usage + *************************************************************************************/ + + std::function blockStreamCollideFck({{- ["uint_t level", "uint8_t timestep", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} ) + { + return [{{- ["this", "level", "timestep", ["stream"] if target == 'gpu' else []] | type_identifier_list -}}](){ + blockStreamCollide({{- ["level", "timestep", ["stream"] if target == 'gpu' else []] | type_identifier_list -}}); + }; + } + + void streamCollideOverBlocks({{- ["uint_t level", "uint8_t timestep", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} ) + { + blockStreamCollide({{- ["level", "timestep", ["stream"] if target == 'gpu' else []] | type_identifier_list -}}); + } + + + {%- for kernel in kernels %} + + std::function {{kernel['function_name']}}() + { + return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", ] | type_identifier_list -}}); }; + } + + std::function {{kernel['function_name']}}({{- ["Type type", ] | type_identifier_list -}}) + { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + + switch (type) + { + case Type::INNER: + return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); }; + case Type::OUTER: + return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); }; + default: + return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", ] | type_identifier_list -}}); }; + } + } + + std::function {{kernel['function_name']}}({{- ["Type type", "const cell_idx_t ghost_layers"] | type_identifier_list -}}) + { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + + switch (type) + { + case Type::INNER: + return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); }; + case Type::OUTER: + return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); }; + default: + return [{{- ["this", "ghost_layers"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers"] | type_identifier_list -}}); }; + } + } + + {% if target is equalto 'gpu' -%} + std::function {{kernel['function_name']}}({{- ["Type type", "const cell_idx_t ghost_layers", "gpuStream_t gpuStream"] | type_identifier_list -}}) + { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + + switch (type) + { + case Type::INNER: + return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); }; + case Type::OUTER: + return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); }; + default: + return [{{- ["this", "ghost_layers", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers", "gpuStream"] | type_identifier_list -}}); }; + } + } + + std::function {{kernel['function_name']}}({{- ["Type type", "gpuStream_t gpuStream"] | type_identifier_list -}}) + { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + + switch (type) + { + case Type::INNER: + return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); }; + case Type::OUTER: + return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); }; + default: + return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "cell_idx_c(0)", "gpuStream"] | type_identifier_list -}}); }; + } + } + {%- endif %} + + void {{kernel['function_name']}}({{- ["IBlock * block",] | type_identifier_list -}}) + { + const cell_idx_t ghost_layers = 0; + {% if target is equalto 'gpu' -%} + gpuStream_t gpuStream = nullptr; + {%- endif %} + + {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}} + {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}} + {{kernel['kernel']|generate_timestep_advancements|indent(6)}} + {{kernel['function_name']}}({{kernel['kernel']|generate_function_collection_call(ghost_layers='ghost_layers')}}); + {{kernel['kernel']|generate_swaps|indent(6)}} + } + + void {{kernel['function_name']}}({{- ["IBlock * block", "const cell_idx_t ghost_layers"] | type_identifier_list -}}) + { + {% if target is equalto 'gpu' -%} + gpuStream_t gpuStream = nullptr; + {%- endif %} + + {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}} + {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}} + {{kernel['kernel']|generate_timestep_advancements|indent(6)}} + {{kernel['function_name']}}({{kernel['kernel']|generate_function_collection_call(ghost_layers='ghost_layers')}}); + {{kernel['kernel']|generate_swaps|indent(6)}} + } + + {% if target is equalto 'gpu' -%} + void {{kernel['function_name']}}({{- ["IBlock * block", "const cell_idx_t ghost_layers", "gpuStream_t gpuStream"] | type_identifier_list -}}) + { + {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}} + {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}} + {{kernel['kernel']|generate_timestep_advancements|indent(6)}} + {{kernel['function_name']}}({{kernel['kernel']|generate_function_collection_call(ghost_layers='ghost_layers')}}); + {{kernel['kernel']|generate_swaps|indent(6)}} + } + {%- endif %} + + void {{kernel['function_name']}}CellInterval({{- ["IBlock * block", "const CellInterval & ci", ["gpuStream_t gpuStream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}) + { + {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}} + {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}} + {{kernel['kernel']|generate_timestep_advancements|indent(6)}} + {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}}); + {{kernel['kernel']|generate_swaps|indent(6)}} + } + + void {{kernel['function_name']}}Inner({{- ["IBlock * block", ["gpuStream_t gpuStream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}) + { + {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}} + {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}} + {{kernel['kernel']|generate_timestep_advancements(advance=False)|indent(6)}} + + CellInterval inner = {{kernel['field']}}->xyzSize(); + inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2])); + + {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='inner')}}); + } + + void {{kernel['function_name']}}Outer({{- ["IBlock * block", ["gpuStream_t gpuStream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}) + { + + {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}} + {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}} + {{kernel['kernel']|generate_timestep_advancements|indent(6)}} + + if( layers_.empty() ) + { + CellInterval ci; + + {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false); + layers_.push_back(ci); + {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false); + layers_.push_back(ci); + + {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false); + ci.expand(Cell(0, 0, -outerWidth_[2])); + layers_.push_back(ci); + {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false); + ci.expand(Cell(0, 0, -outerWidth_[2])); + layers_.push_back(ci); + + {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false); + ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2])); + layers_.push_back(ci); + {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false); + ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2])); + layers_.push_back(ci); + } + + {%if target is equalto 'gpu'%} + { + auto parallelSection_ = parallelStreams_.parallelSection( gpuStream ); + for( auto & ci: layers_ ) + { + parallelSection_.run([&]( auto s ) { + {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}}); + }); + } + } + {% else %} + for( auto & ci: layers_ ) + { + {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}}); + } + {% endif %} + + {{kernel['kernel']|generate_swaps|indent(9)}} + } + {% endfor %} + + {%if target is equalto 'gpu'%} + void setOuterPriority(int priority) + { + parallelStreams_.setStreamPriority(priority); + } + {%endif%} + + private: + shared_ptr< StructuredBlockForest > blocks_; + {{kernel_list|generate_members(parameter_registration=parameter_scaling)|indent(4)}} + + Cell outerWidth_; + std::vector layers_; + bool validInnerOuterSplit_; + bool blockWise_{false}; + + {%if target is equalto 'gpu' -%} + gpu::ParallelStreams parallelStreams_; + {%- endif %} + + {%if block_stream_collide -%} + + std::vector size_0; + + int64_t size_1; + int64_t size_2; + int64_t size_3; + + {%- for field in block_stream_collide['all_fields'] %} + int64_t stride_{{field.name}}_0; + int64_t stride_{{field.name}}_1; + int64_t stride_{{field.name}}_2; + int64_t stride_{{field.name}}_3; + {% endfor %} + + {%- for field in block_stream_collide['all_fields'] %} + std::vector> {{field.name}}Pointers; + {% endfor -%} + + std::vector>> glPropagationPDFs; + {%if target is equalto 'gpu' -%} + + + {%- for field in block_stream_collide['all_fields'] %} + std::vector<{{field.dtype.c_name}} **> {{field.name}}PointersGPU; + {% endfor -%} + + std::vector block_; + std::vector grid_; + + std::vector> glPropagationPDFsGPU; + + std::vector> glPropagationBlock_; + std::vector> glPropagationGrid_; + {%- endif %} + {%- endif %} +}; + + +} // namespace {{namespace}} +} // namespace walberla + + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic pop +#endif diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp index 92106cdd..9558d0d2 100644 --- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp +++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp @@ -19,12 +19,45 @@ #include "{{class_name}}.h" + +{% if target is equalto 'cpu' -%} +#define FUNC_PREFIX +{%- elif target is equalto 'gpu' -%} +#define FUNC_PREFIX __global__ +#include "gpu/GPUWrapper.h" +#include "gpu/GPUField.h" +{%- endif %} + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + + #if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wfloat-equal" # pragma GCC diagnostic ignored "-Wshadow" # pragma GCC diagnostic ignored "-Wconversion" # pragma GCC diagnostic ignored "-Wunused-variable" +# pragma GCC diagnostic ignored "-Wignored-qualifiers" +#endif + +#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL ) +#pragma warning push +#pragma warning( disable : 1599 ) +#endif + +#ifdef __CUDACC__ +#pragma push +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diag_suppress 191 +#else +#pragma diag_suppress 191 +#endif #endif namespace walberla { @@ -42,6 +75,8 @@ namespace {{namespace}} { {{ kernels['localCopyDirection'] | generate_definitions }} {% if nonuniform -%} + {{ kernels['localCopyRedistribute'] | generate_definitions }} + {{ kernels['localPartialCoalescence'] | generate_definitions }} {{ kernels['unpackRedistribute'] | generate_definitions }} {{ kernels['packPartialCoalescence'] | generate_definitions }} {{ kernels['zeroCoalescenceRegion'] | generate_definitions }} @@ -89,7 +124,7 @@ namespace {{namespace}} { WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize()) {{kernels['localCopyAll'] - | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream') + | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream') | indent(6) }} } @@ -115,6 +150,56 @@ namespace {{namespace}} { {{kernels['unpackDirection'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }} } + void {{class_name}}::PackKernels::localCopyDirection( + {{- [src_field.dtype.c_name + "** _data_" + src_field.name + "_dp", dst_field.dtype.c_name + "** _data_" + dst_field.name + "_dp", + kernels['localCopyDirection'].kernel_selection_parameters, + ["gpuStream_t stream"] if is_gpu else [], "std::array& _sizes", "std::array& _strides"] + | type_identifier_list -}} + ) const { + {% if block_wise -%} + + {% if target is equalto 'gpu' -%} + + const int64_t indexingX = _sizes[0] * _sizes[1]; + const int64_t indexingY = _sizes[2]; + const int64_t indexingZ = _sizes[3]; + + const int64_t cudaBlockSize0 = 128; + const int64_t cudaBlockSize1 = 1; + const int64_t cudaBlockSize2 = 1; + + const int64_t _size_0 = _sizes[0]; + + const int64_t _size_{{src_field.name}}_0 = _sizes[1]; + const int64_t _size_{{src_field.name}}_1 = _sizes[2]; + const int64_t _size_{{src_field.name}}_2 = _sizes[3]; + const int64_t _size_{{dst_field.name}}_0 = _sizes[1]; + const int64_t _size_{{dst_field.name}}_1 = _sizes[2]; + const int64_t _size_{{dst_field.name}}_2 = _sizes[3]; + + const int64_t _stride_{{src_field.name}}_0 = _strides[0]; + const int64_t _stride_{{src_field.name}}_1 = _strides[1]; + const int64_t _stride_{{src_field.name}}_2 = _strides[2]; + const int64_t _stride_{{src_field.name}}_3 = _strides[3]; + + const int64_t _stride_{{dst_field.name}}_0 = _strides[0]; + const int64_t _stride_{{dst_field.name}}_1 = _strides[1]; + const int64_t _stride_{{dst_field.name}}_2 = _strides[2]; + const int64_t _stride_{{dst_field.name}}_3 = _strides[3]; + + const dim3 _block = dim3((unsigned int)((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)), (unsigned int)((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))), (unsigned int)((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))); + const dim3 _grid = dim3((unsigned int)(( (indexingX) % (((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) == 0 ? (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) : ( (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) ) +1 )), (unsigned int)(( (indexingY) % (((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) == 0 ? (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) : ( (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) ) +1 )), (unsigned int)(( (indexingZ) % (((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) == 0 ? (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) : ( (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) ) +1 ))); + {%- endif %} + + {{kernels['localCopyDirection'] + | generate_call(plain_kernel_call=True, stream='stream') + | indent(6) }} + + {%else%} + WALBERLA_ABORT("Block wise local communication is not implemented") + {%- endif %} + } + void {{class_name}}::PackKernels::localCopyDirection( {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval", "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", @@ -123,16 +208,42 @@ namespace {{namespace}} { | type_identifier_list -}} ) const { + {% if not block_wise -%} WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize()) WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize()) WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize()) {{kernels['localCopyDirection'] - | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream') + | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream') | indent(6) }} + + {%else%} + WALBERLA_ABORT("Local communication is only implemented block wise") + {%- endif %} } + {% if nonuniform -%} + void {{class_name}}::PackKernels::localCopyRedistribute( + {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval", + "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCopyRedistribute'].kernel_selection_parameters, + ["gpuStream_t stream"] if is_gpu else []] + | type_identifier_list -}} + ) const + { + {{kernels['localCopyRedistribute'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream') | indent(6) }} + } + + void {{class_name}}::PackKernels::localPartialCoalescence( + {{- [ "PdfField_T * " + src_field.name, "MaskField_T * " + mask_field.name, "CellInterval & srcInterval", + "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localPartialCoalescence'].kernel_selection_parameters, + ["gpuStream_t stream"] if is_gpu else []] + | type_identifier_list -}} + ) const + { + {{kernels['localPartialCoalescence'] | generate_call(cell_interval={src_field.name : 'srcInterval', mask_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream') | indent(6) }} + } + void {{class_name}}::PackKernels::unpackRedistribute( {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci", "unsigned char * inBuffer", kernels['unpackDirection'].kernel_selection_parameters, diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h index ad40a55e..e10644b6 100644 --- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h +++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h @@ -30,21 +30,13 @@ #include "stencil/{{stencil_name}}.h" #include "stencil/Directions.h" -{% if target is equalto 'cpu' -%} -#define FUNC_PREFIX -{%- elif target is equalto 'gpu' -%} -#define FUNC_PREFIX __global__ +{% if target is equalto 'gpu' -%} #include "gpu/GPUWrapper.h" #include "gpu/GPUField.h" {%- endif %} -#ifdef __GNUC__ -#define RESTRICT __restrict__ -#elif _MSC_VER -#define RESTRICT __restrict -#else -#define RESTRICT -#endif +#include + #if defined WALBERLA_CXX_COMPILER_IS_GNU || defined WALBERLA_CXX_COMPILER_IS_CLANG #pragma GCC diagnostic push @@ -120,6 +112,7 @@ class {{class_name}} {%- endif %} static const bool inplace = {% if inplace -%} true {%- else -%} false {%- endif -%}; + static const bool blockWise = {% if block_wise -%} true {%- else -%} false {%- endif -%}; /** * Packs all pdfs from the given cell interval to the send buffer. @@ -178,6 +171,13 @@ class {{class_name}} /** Copies data between two blocks on the same process. * PDFs streaming aligned with the direction dir are copied from the sending interval onto the receiving interval. * */ + void localCopyDirection( + {{- [src_field.dtype.c_name + "** _data_" + src_field.name + "_dp", dst_field.dtype.c_name + "** _data_" + dst_field.name + "_dp", + kernels['localCopyDirection'].kernel_selection_parameters, + ["gpuStream_t stream"] if is_gpu else [], "std::array& _sizes", "std::array& _strides"] + | type_identifier_list -}} + ) const; + void localCopyDirection( {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval", "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", @@ -186,6 +186,7 @@ class {{class_name}} | type_identifier_list -}} ) const; + /** * Returns the number of bytes that will be packed from / unpacked to the cell interval * when using packDirection / unpackDirection @@ -209,6 +210,26 @@ class {{class_name}} {% if nonuniform -%} + /** + * Local uniform redistribute. + * */ + void localCopyRedistribute( + {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval", + "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCopyRedistribute'].kernel_selection_parameters, + ["gpuStream_t stream = nullptr"] if is_gpu else []] + | type_identifier_list -}} + ) const; + + /** + * Local partial coalescence. + * */ + void localPartialCoalescence( + {{- [ "PdfField_T * " + src_field.name, "MaskField_T * " + mask_field.name, "CellInterval & srcInterval", + "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localPartialCoalescence'].kernel_selection_parameters, + ["gpuStream_t stream = nullptr"] if is_gpu else []] + | type_identifier_list -}} + ) const; + /** * Unpacks and uniformly redistributes populations coming from a coarse block onto the fine grid. * */ diff --git a/python/lbmpy_walberla/walberla_lbm_package.py b/python/lbmpy_walberla/walberla_lbm_package.py index ea583181..7465a45b 100644 --- a/python/lbmpy_walberla/walberla_lbm_package.py +++ b/python/lbmpy_walberla/walberla_lbm_package.py @@ -3,7 +3,6 @@ from pystencils import Target, Field from lbmpy.creationfunctions import LbmCollisionRule, LBMConfig, LBMOptimisation -from lbmpy.relaxationrates import get_shear_relaxation_rate from pystencils_walberla.cmake_integration import CodeGenerationContext @@ -35,7 +34,7 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str, cpu_openmp=cpu_openmp) if nonuniform: - omega = get_shear_relaxation_rate(method) + omega = lbm_config.relaxation_rate refinement_scaling = RefinementScaling() refinement_scaling.add_standard_relaxation_rate_scaling(omega) else: @@ -47,15 +46,15 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str, macroscopic_fields=macroscopic_fields, target=target, data_type=data_type, cpu_openmp=cpu_openmp, cpu_vectorize_info=cpu_vectorize_info, - max_threads=max_threads, - set_pre_collision_pdfs=set_pre_collision_pdfs, + max_threads=max_threads, set_pre_collision_pdfs=set_pre_collision_pdfs, **kernel_parameters) spatial_shape = None if lbm_optimisation.symbolic_field and lbm_optimisation.symbolic_field.has_fixed_shape: spatial_shape = lbm_optimisation.symbolic_field.spatial_shape + (lbm_config.stencil.Q, ) - generate_boundary_collection(ctx, f'{name}BoundaryCollection', boundary_generators=boundaries, - lb_method=method, field_name='pdfs', spatial_shape=spatial_shape, - streaming_pattern=lbm_config.streaming_pattern, - target=target, layout=lbm_optimisation.field_layout) + if boundaries is not None: + generate_boundary_collection(ctx, f'{name}BoundaryCollection', boundary_generators=boundaries, + lb_method=method, field_name='pdfs', spatial_shape=spatial_shape, + streaming_pattern=lbm_config.streaming_pattern, + target=target, layout=lbm_optimisation.field_layout) diff --git a/python/pystencils_walberla/__init__.py b/python/pystencils_walberla/__init__.py index f78f7fcf..158a2945 100644 --- a/python/pystencils_walberla/__init__.py +++ b/python/pystencils_walberla/__init__.py @@ -1,4 +1,4 @@ -from .boundary import generate_staggered_boundary, generate_staggered_flux_boundary +from .boundary import generate_boundary, generate_staggered_boundary, generate_staggered_flux_boundary from .cmake_integration import CodeGeneration, ManualCodeGenerationContext from .function_generator import function_generator @@ -8,7 +8,7 @@ generate_pack_info_from_kernel, generate_mpidtype_info_from_kernel) from .utility import generate_info_header, get_vectorize_instruction_set, config_from_context -__all__ = ['generate_staggered_boundary', 'generate_staggered_flux_boundary', +__all__ = ['generate_boundary', 'generate_staggered_boundary', 'generate_staggered_flux_boundary', 'CodeGeneration', 'ManualCodeGenerationContext', 'function_generator', 'generate_sweep', 'generate_selective_sweep', 'generate_sweep_collection', diff --git a/python/pystencils_walberla/boundary.py b/python/pystencils_walberla/boundary.py index 7af79ed6..6e4ff76b 100644 --- a/python/pystencils_walberla/boundary.py +++ b/python/pystencils_walberla/boundary.py @@ -67,8 +67,16 @@ def generate_boundary(generation_context, if not kernel_creation_function: kernel_creation_function = create_boundary_kernel - kernel = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object, - target=target, **create_kernel_params) + bc_force = hasattr(boundary_object, "calculate_force_on_boundary") and boundary_object.calculate_force_on_boundary + if bc_force: + force_vector_type = np.dtype([(f"F_{i}", np.float64) for i in range(dim)], align=True) + force_vector = Field('forceVector', FieldType.INDEXED, force_vector_type, layout=[0], + shape=(TypedSymbol("forceVectorSize", create_type("int32")), 1), strides=(1, 1)) + kernel = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object, + target=target, force_vector=force_vector, **create_kernel_params) + else: + kernel = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object, + target=target, **create_kernel_params) if isinstance(kernel, KernelFunction): kernel.function_name = f"boundary_{boundary_object.name}" @@ -103,7 +111,8 @@ def generate_boundary(generation_context, 'additional_data_handler': additional_data_handler, 'dtype': "double" if is_float else "float", 'layout': layout, - 'index_shape': index_shape + 'index_shape': index_shape, + 'calculate_force': bc_force } env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined) diff --git a/python/pystencils_walberla/jinja_filters.py b/python/pystencils_walberla/jinja_filters.py index 6d05bf8f..d83ed6bb 100644 --- a/python/pystencils_walberla/jinja_filters.py +++ b/python/pystencils_walberla/jinja_filters.py @@ -9,27 +9,24 @@ from pystencils import Target, Backend from pystencils.backends.cbackend import generate_c -from pystencils.typing import TypedSymbol, get_base_type +from pystencils.typing import TypedSymbol, get_base_type, PointerType from pystencils.field import FieldType from pystencils.sympyextensions import prod temporary_fieldPointerTemplate = """{type}""" -temporary_fieldMemberTemplate = """ -private: std::set< {type} *, field::SwapableCompare< {type} * > > cache_{original_field_name}_;""" +temporary_fieldMemberTemplate = "std::unordered_map cache_{original_field_name}_;" temporary_fieldTemplate = """ {{ - // Getting temporary field {tmp_field_name} - auto it = cache_{original_field_name}_.find( {original_field_name} ); - if( it != cache_{original_field_name}_.end() ) + if (cache_{original_field_name}_.find(block) == cache_{original_field_name}_.end()) {{ - {tmp_field_name} = *it; + {tmp_field_name} = {original_field_name}->cloneUninitialized(); + cache_{original_field_name}_[block] = {tmp_field_name}; }} else {{ - {tmp_field_name} = {original_field_name}->cloneUninitialized(); - cache_{original_field_name}_.insert({tmp_field_name}); + {tmp_field_name} = cache_{original_field_name}_[block]; }} }} """ @@ -40,7 +37,7 @@ delete_loop = """ for(auto p: cache_{original_field_name}_) {{ - delete p; + delete p.second; }} """ @@ -226,17 +223,23 @@ def generate_block_data_to_field_extraction(ctx, kernel_info, parameters_to_igno def generate_refs_for_kernel_parameters(kernel_info, prefix, parameters_to_ignore=(), ignore_fields=False, - parameter_registration=None): + parameter_registration=None, level_known=False): + + pointer_symbols = {p.symbol.name for p in kernel_info.parameters + if not p.is_field_parameter and isinstance(p.symbol.dtype, PointerType)} symbols = {p.field_name for p in kernel_info.parameters if p.is_field_pointer and not ignore_fields} symbols.update(p.symbol.name for p in kernel_info.parameters if not p.is_field_parameter) symbols.difference_update(parameters_to_ignore) + if ignore_fields: + symbols.difference_update(pointer_symbols) type_information = {p.symbol.name: p.symbol.dtype for p in kernel_info.parameters if not p.is_field_parameter} result = [] registered_parameters = [] if not parameter_registration else parameter_registration.scaling_info for s in symbols: if s in registered_parameters: dtype = type_information[s].c_name - result.append("const uint_t level = block->getBlockStorage().getLevel(*block);") + if not level_known: + result.append("const uint_t level = block->getBlockStorage().getLevel(*block);") result.append(f"{dtype} & {s} = {s}Vector[level];") else: result.append(f"auto & {s} = {prefix}{s}_;") @@ -245,7 +248,7 @@ def generate_refs_for_kernel_parameters(kernel_info, prefix, parameters_to_ignor @jinja2_context_decorator def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, stream='0', - spatial_shape_symbols=()): + spatial_shape_symbols=(), plain_kernel_call=False): """Generates the function call to a pystencils kernel Args: @@ -265,6 +268,20 @@ def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, st parameters - however in special cases like boundary conditions a manual specification may be necessary. """ + ast_params = kernel.parameters + if len(spatial_shape_symbols) == 0: + for param in ast_params: + if param.is_field_parameter and FieldType.is_indexed(param.fields[0]): + continue + if param.is_field_pointer: + field = param.fields[0] + if field.has_fixed_shape: + spatial_shape_symbols = field.spatial_shape + + if plain_kernel_call: + return kernel.generate_kernel_invocation_code(plain_kernel_call=True, stream=stream, + spatial_shape_symbols=spatial_shape_symbols) + assert isinstance(ghost_layers_to_include, str) or ghost_layers_to_include >= 0 ast_params = kernel.parameters vec_info = ctx.get('cpu_vectorize_info', None) @@ -296,7 +313,7 @@ def get_cell_interval(field_object): if isinstance(cell_interval, str): return cell_interval elif isinstance(cell_interval, dict): - return cell_interval[field_object] + return cell_interval[field_object.name] else: return None @@ -591,7 +608,8 @@ def generate_members(ctx, kernel_infos, parameters_to_ignore=None, only_fields=F original_field_name = field_name[:-len('_tmp')] f_size = get_field_fsize(f) field_type = make_field_type(get_base_type(f.dtype), f_size, is_gpu) - result.append(temporary_fieldMemberTemplate.format(type=field_type, original_field_name=original_field_name)) + result.append(temporary_fieldMemberTemplate.format(type=field_type, + original_field_name=original_field_name)) for kernel_info in kernel_infos: if hasattr(kernel_info, 'varying_parameters'): @@ -734,6 +752,16 @@ def generate_constructor(ctx, kernel_infos, parameter_registration): return "\n".join(result) +@jinja2_context_decorator +def generate_field_strides(ctx, kernel_info): + result = [] + for param in kernel_info.parameters: + if param.is_field_stride: + type_str = param.symbol.dtype.c_name + result.append(f"const {type_str} {param.symbol.name} = {param.symbol.name[1:]};") + return "\n".join(result) + + def generate_list_of_expressions(expressions, prepend=''): if len(expressions) == 0: return '' @@ -806,3 +834,4 @@ def add_pystencils_filters_to_jinja_env(jinja_env): jinja_env.filters['identifier_list'] = identifier_list jinja_env.filters['list_of_expressions'] = generate_list_of_expressions jinja_env.filters['field_type'] = field_type + jinja_env.filters['generate_field_strides'] = generate_field_strides diff --git a/python/pystencils_walberla/kernel_info.py b/python/pystencils_walberla/kernel_info.py index 586c05ab..698b0511 100644 --- a/python/pystencils_walberla/kernel_info.py +++ b/python/pystencils_walberla/kernel_info.py @@ -32,13 +32,20 @@ def get_headers(self): all_headers = [list(get_headers(self.ast))] return reduce(merge_sorted_lists, all_headers) - def generate_kernel_invocation_code(self, **kwargs): + def generate_kernel_invocation_code(self, plain_kernel_call=False, **kwargs): ast = self.ast ast_params = self.parameters fnc_name = ast.function_name is_cpu = self.ast.target == Target.CPU call_parameters = ", ".join([p.symbol.name for p in ast_params]) + if plain_kernel_call: + if is_cpu: + return f"internal_{fnc_name}::{fnc_name}({call_parameters});" + else: + stream = kwargs.get('stream', '0') + return f"internal_{fnc_name}::{fnc_name}<<<_grid, _block, 0, {stream}>>>({call_parameters});" + if not is_cpu: stream = kwargs.get('stream', '0') spatial_shape_symbols = kwargs.get('spatial_shape_symbols', ()) diff --git a/python/pystencils_walberla/kernel_selection.py b/python/pystencils_walberla/kernel_selection.py index ad8a9986..3195624f 100644 --- a/python/pystencils_walberla/kernel_selection.py +++ b/python/pystencils_walberla/kernel_selection.py @@ -157,6 +157,21 @@ def case_code(case, subtree): return switch_code +class AbortNode(AbstractKernelSelectionNode): + def __init__(self, message): + self.message = message + + @property + def selection_parameters(self): + return set() + + def collect_kernel_calls(self): + return set() + + def get_code(self, **kwargs): + return f'WALBERLA_ABORT("{self.message}")' + + class KernelCallNode(AbstractKernelSelectionNode): def __init__(self, ast): self.ast = ast @@ -169,13 +184,20 @@ def selection_parameters(self) -> Set[TypedSymbol]: def collect_kernel_calls(self): return {self} - def get_code(self, **kwargs): + def get_code(self, plain_kernel_call=False, **kwargs): ast = self.ast ast_params = self.parameters fnc_name = ast.function_name is_cpu = self.ast.target == Target.CPU call_parameters = ", ".join([p.symbol.name for p in ast_params]) + if plain_kernel_call: + if is_cpu: + return f"internal_{fnc_name}::{fnc_name}({call_parameters});" + else: + stream = kwargs.get('stream', '0') + return f"internal_{fnc_name}::{fnc_name}<<<_grid, _block, 0, {stream}>>>({call_parameters});" + if not is_cpu: stream = kwargs.get('stream', '0') spatial_shape_symbols = kwargs.get('spatial_shape_symbols', ()) diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.cpp b/python/pystencils_walberla/templates/Boundary.tmpl.cpp index 644202ba..3ece9a91 100644 --- a/python/pystencils_walberla/templates/Boundary.tmpl.cpp +++ b/python/pystencils_walberla/templates/Boundary.tmpl.cpp @@ -82,10 +82,23 @@ void {{class_name}}::run_impl( uint8_t * _data_indexVector = reinterpret_cast(pointer); - {{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize'])|indent(4)}} + {% if calculate_force -%} + auto * forceVector = block->getData(forceVectorID); + WALBERLA_ASSERT_EQUAL(indexVectorSize, int32_c( forceVector->forceVector().size() )) + + {% if target == 'gpu' -%} + auto forcePointer = forceVector->pointerGpu(); + int32_t forceVectorSize = int32_c( forceVector->forceVector().size() ); + {% else %} + auto forcePointer = forceVector->pointerCpu(); + {% endif %} + uint8_t * _data_forceVector = reinterpret_cast(forcePointer); + {%- endif %} + + {{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize'])|indent(4)}} {{kernel|generate_timestep_advancements|indent(4)}} - {{kernel|generate_refs_for_kernel_parameters(prefix='', parameters_to_ignore=['indexVectorSize'], ignore_fields=True)|indent(4) }} - {{kernel|generate_call(spatial_shape_symbols=['indexVectorSize'], stream='stream')|indent(4)}} + {{kernel|generate_refs_for_kernel_parameters(prefix='', parameters_to_ignore=['indexVectorSize', 'forceVectorSize'], ignore_fields=True)|indent(4) }} + {{kernel|generate_call(spatial_shape_symbols=['indexVectorSize', 'forceVectorSize'], stream='stream')|indent(4)}} } void {{class_name}}::run( diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.h b/python/pystencils_walberla/templates/Boundary.tmpl.h index 704a7227..43fd409e 100644 --- a/python/pystencils_walberla/templates/Boundary.tmpl.h +++ b/python/pystencils_walberla/templates/Boundary.tmpl.h @@ -110,6 +110,9 @@ class {{class_name}} {%- endif %} } + + + private: std::vector cpuVectors_{NUM_TYPES}; @@ -119,12 +122,88 @@ class {{class_name}} {%- endif %} }; + {% if calculate_force -%} + + struct ForceStruct { + double F_0; + double F_1; + double F_2; + ForceStruct() : F_0(double_c(0.0)), F_1(double_c(0.0)), F_2(double_c(0.0)) {} + bool operator==(const ForceStruct & o) const { + return floatIsEqual(F_0, o.F_0) && floatIsEqual(F_1, o.F_1) && floatIsEqual(F_2, o.F_2); + } + }; + + class ForceVector + { + public: + ForceVector() = default; + bool operator==(ForceVector const &other) const { return other.cpuVector_ == cpuVector_; } + + {% if target == 'gpu' -%} + ~ForceVector() {if(!gpuVector_.empty()){WALBERLA_GPU_CHECK(gpuFree( gpuVector_[0] ))}} + {% endif -%} + + std::vector & forceVector() { return cpuVector_; } + ForceStruct * pointerCpu() { return cpuVector_.data(); } + bool empty() {return cpuVector_.empty();} + + {% if target == 'gpu' -%} + ForceStruct * pointerGpu() { return gpuVector_[0]; } + {% endif -%} + + Vector3 getForce() + { + syncCPU(); + Vector3 result(double_c(0.0)); + for(std::vector::iterator it = cpuVector_.begin(); it != cpuVector_.end(); ++it) + { + result[0] += it->F_0; + result[1] += it->F_1; + result[2] += it->F_2; + } + return result; + } + + void syncGPU() + { + {% if target == 'gpu' -%} + if(!gpuVector_.empty()){WALBERLA_GPU_CHECK(gpuFree( gpuVector_[0] ))} + if(!cpuVector_.empty()) + { + gpuVector_.resize(cpuVector_.size()); + WALBERLA_GPU_CHECK(gpuMalloc(&gpuVector_[0], sizeof(ForceStruct) * cpuVector_.size())) + WALBERLA_GPU_CHECK(gpuMemcpy(gpuVector_[0], &cpuVector_[0], sizeof(ForceStruct) * cpuVector_.size(), gpuMemcpyHostToDevice)) + } + {%- endif %} + } + + void syncCPU() + { + {% if target == 'gpu' -%} + WALBERLA_GPU_CHECK(gpuMemcpy( &cpuVector_[0], gpuVector_[0] , sizeof(ForceStruct) * cpuVector_.size(), gpuMemcpyDeviceToHost )) + {%- endif %} + } + + private: + std::vector cpuVector_; + {% if target == 'gpu' -%} + std::vector gpuVector_; + {%- endif %} + }; + + {%- endif %} + {{class_name}}( const shared_ptr & blocks, - {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}}{{additional_data_handler.constructor_arguments}}) - :{{additional_data_handler.initialiser_list}} {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }} + {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize'])}}{{additional_data_handler.constructor_arguments}}) + :{{additional_data_handler.initialiser_list}} {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize']) }} { auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); }; indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_{{class_name}}"); + {% if calculate_force -%} + auto createForceVector = []( IBlock * const , StructuredBlockStorage * const ) { return new ForceVector(); }; + forceVectorID = blocks->addStructuredBlockData< ForceVector >( createForceVector, "forceVector_{{class_name}}"); + {%- endif %} }; void run ( @@ -148,6 +227,19 @@ class {{class_name}} {{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} ); + Vector3 getForce(IBlock * {% if calculate_force -%}block{%else%}/*block*/{%- endif %}) + { + {% if calculate_force -%} + auto * forceVector = block->getData(forceVectorID); + if(forceVector->empty()) + return Vector3(double_c(0.0)); + return forceVector->getForce(); + {% else %} + WALBERLA_ABORT("Boundary condition was not generated including force calculation.") + return Vector3(double_c(0.0)); + {%- endif %} + } + std::function getSweep( {{- [interface_spec.high_level_args, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} ) { return [ {{- ["this", interface_spec.high_level_args, ["stream"] if target == 'gpu' else []] | identifier_list -}} ] @@ -186,6 +278,9 @@ class {{class_name}} auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL); auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER); auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER); + {% if calculate_force -%} + auto * forceVector = block->getData< ForceVector > ( forceVectorID ); + {%- endif %} auto * flagField = block->getData< FlagField_T > ( flagFieldID ); {{additional_data_handler.additional_field_data|indent(4)}} @@ -300,6 +395,10 @@ class {{class_name}} {% endif %} indexVectors->syncGPU(); + {% if calculate_force -%} + forceVector->forceVector().resize(indexVectorAll.size()); + forceVector->syncGPU(); + {%- endif %} } private: @@ -310,9 +409,12 @@ class {{class_name}} ); BlockDataID indexVectorID; + {% if calculate_force -%} + BlockDataID forceVectorID; + {%- endif %} {{additional_data_handler.additional_member_variable|indent(4)}} public: - {{kernel|generate_members(('indexVector', 'indexVectorSize'))|indent(4)}} + {{kernel|generate_members(('indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize'))|indent(4)}} }; diff --git a/src/blockforest/AABBRefinementSelection.h b/src/blockforest/AABBRefinementSelection.h index 45847dca..f92b062d 100644 --- a/src/blockforest/AABBRefinementSelection.h +++ b/src/blockforest/AABBRefinementSelection.h @@ -46,7 +46,7 @@ class AABBRefinementSelection { if( configBlock ) { - auto refinementBlock = configBlock.getBlock( "AABBRefinementSelection" ); + auto refinementBlock = configBlock.getKey() == "AABBRefinementSelection" ? configBlock : configBlock.getBlock( "AABBRefinementSelection" ); if( refinementBlock ) { diff --git a/src/blockforest/communication/UniformBufferedScheme.h b/src/blockforest/communication/UniformBufferedScheme.h index 1693d0d6..e7cbd6c6 100644 --- a/src/blockforest/communication/UniformBufferedScheme.h +++ b/src/blockforest/communication/UniformBufferedScheme.h @@ -158,9 +158,9 @@ class UniformBufferedScheme void startCommunication(); void wait(); + std::function getCommunicateFunctor(); std::function getStartCommunicateFunctor(); std::function getWaitFunctor(); - std::function getCommunicateFunctor(); //@} //******************************************************************************************************************* @@ -535,21 +535,21 @@ void UniformBufferedScheme::localBufferUnpacking( const uint_t index, c } template< typename Stencil > -std::function UniformBufferedScheme::getStartCommunicateFunctor() +std::function UniformBufferedScheme::getCommunicateFunctor() { - return std::bind( &UniformBufferedScheme::startCommunication, this ); + return std::bind( &UniformBufferedScheme::communicate, this ); } template< typename Stencil > -std::function UniformBufferedScheme::getWaitFunctor() +std::function UniformBufferedScheme::getStartCommunicateFunctor() { - return std::bind( &UniformBufferedScheme::wait, this ); + return std::bind( &UniformBufferedScheme::startCommunication, this ); } template< typename Stencil > -std::function UniformBufferedScheme::getCommunicateFunctor() +std::function UniformBufferedScheme::getWaitFunctor() { - return std::bind( &UniformBufferedScheme::communicate, this ); + return std::bind( &UniformBufferedScheme::wait, this ); } } // namespace communication diff --git a/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h b/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h index a5dc8d8c..de398e65 100644 --- a/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h +++ b/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h @@ -46,6 +46,8 @@ class GeneratedNonUniformGPUPackInfo inline void packDataEqualLevel( const Block * sender, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream = nullptr) const; virtual void unpackDataEqualLevel( Block * receiver, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream) = 0; + virtual void addForLocalEqualLevelComm(const Block* sender, Block* receiver, stencil::Direction dir) = 0; + virtual void communicateLocalEqualLevel(uint64_t level, uint8_t timestep, gpuStream_t stream) = 0; virtual void communicateLocalEqualLevel( const Block * sender, Block * receiver, stencil::Direction dir, gpuStream_t stream) = 0; inline void packDataCoarseToFine ( const Block * coarseSender, const BlockID & fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream = nullptr) const; @@ -63,6 +65,7 @@ class GeneratedNonUniformGPUPackInfo virtual uint_t sizeCoarseToFineReceive ( Block* fineReceiver, stencil::Direction dir) = 0; virtual uint_t sizeFineToCoarseSend ( const Block * fineSender, stencil::Direction dir) = 0; + virtual void sync() = 0; #ifndef NDEBUG void clearBufferSizeCheckMap() { bufferSize_.clear(); } diff --git a/src/gpu/communication/NonUniformGPUScheme.h b/src/gpu/communication/NonUniformGPUScheme.h index b872be1d..ddd9fafe 100644 --- a/src/gpu/communication/NonUniformGPUScheme.h +++ b/src/gpu/communication/NonUniformGPUScheme.h @@ -90,6 +90,8 @@ class NonUniformGPUScheme inline void waitCommunicateCoarseToFine(uint_t fineLevel); inline void waitCommunicateFineToCoarse(uint_t fineLevel); + inline void setTimestepForLevel(uint_t level, uint8_t timestep) {timestepPerLevel_[level] = timestep;} + private: void setupCommunication(); @@ -128,6 +130,7 @@ class NonUniformGPUScheme Set< SUID > incompatibleBlockSelectors_; gpuStream_t streams_[Stencil::Q]; + std::vector< uint8_t > timestepPerLevel_; }; template< typename Stencil > @@ -196,6 +199,7 @@ void NonUniformGPUScheme< Stencil >::refresh() WALBERLA_CHECK_NOT_NULLPTR(forest, "Trying to access communication for a block storage object that doesn't exist anymore") const uint_t levels = forest->getNumberOfLevels(); + timestepPerLevel_.resize(levels); for (uint_t i = 0; i != 3; ++i) { @@ -313,6 +317,13 @@ void NonUniformGPUScheme< Stencil >::startCommunicationEqualLevel(const uint_t i for (auto it : headers_[EQUAL_LEVEL][index]) bufferSystemGPU_[EQUAL_LEVEL][index].sendBuffer(it.first).clear(); + // If localCommunication is generated blockwise it is executed here. + for (auto level : participatingLevels){ + for (auto& pi : packInfos_){ + pi->communicateLocalEqualLevel(level, timestepPerLevel_[level], streams_[0]); + } + } + // Start filling send buffers for (auto& iBlock : *forest) { @@ -369,13 +380,12 @@ void NonUniformGPUScheme< Stencil >::startCommunicationEqualLevel(const uint_t i } } } + // wait for packing to finish - for (uint_t i = 0; i < Stencil::Q; ++i) - { + for (uint_t i = 0; i < Stencil::Q; ++i){ WALBERLA_GPU_CHECK(gpuStreamSynchronize(streams_[i])) } - if (sendFromGPU_) bufferSystemGPU_[EQUAL_LEVEL][index].sendAll(); else @@ -836,8 +846,14 @@ void NonUniformGPUScheme< Stencil >::setupCommunication() if (!selectable::isSetSelected(block->getNeighborState(neighborIdx, uint_t(0)), requiredBlockSelectors_, incompatibleBlockSelectors_)) continue; - if( block->neighborExistsLocally( neighborIdx, uint_t(0) ) ) + + if( block->neighborExistsLocally( neighborIdx, uint_t(0) ) ){ + auto receiverBlock = dynamic_cast< Block * >( forest->getBlock( block->getNeighborId( neighborIdx, uint_t(0) )) ); + for (auto& pi : packInfos_){ + pi->addForLocalEqualLevelComm(block, receiverBlock, *dir); + } continue; + } const BlockID& receiverId = block->getNeighborId(neighborIdx, uint_t(0)); auto nProcess = mpi::MPIRank(block->getNeighborProcess(neighborIdx, uint_t(0))); @@ -915,6 +931,10 @@ void NonUniformGPUScheme< Stencil >::setupCommunication() } } + for (auto& pi : packInfos_){ + pi->sync(); + } + for (uint_t i = 0; i != 3; ++i) { for (uint_t j = 0; j <= levels; ++j) diff --git a/src/lbm_generated/boundary/D3Q19BoundaryCollection.h b/src/lbm_generated/boundary/D3Q19BoundaryCollection.h index eb1a23fb..cca61653 100644 --- a/src/lbm_generated/boundary/D3Q19BoundaryCollection.h +++ b/src/lbm_generated/boundary/D3Q19BoundaryCollection.h @@ -22,6 +22,8 @@ #include "core/DataTypes.h" #include "domain_decomposition/IBlock.h" + + #include "OutflowD3Q19.h" #include "FixedDensityD3Q19.h" #include "FreeSlipD3Q19.h" @@ -43,11 +45,11 @@ class D3Q19BoundaryCollection D3Q19BoundaryCollection(const shared_ptr & blocks, BlockDataID flagID_, BlockDataID pdfsID_, FlagUID domainUID_, double density, double u_x, double u_y, double u_z) : blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_) { - OutflowD3Q19Object = std::make_shared< lbm::OutflowD3Q19 >(blocks, pdfsID); - FixedDensityD3Q19Object = std::make_shared< lbm::FixedDensityD3Q19 >(blocks, pdfsID, density); - FreeSlipD3Q19Object = std::make_shared< lbm::FreeSlipD3Q19 >(blocks, pdfsID); - NoSlipD3Q19Object = std::make_shared< lbm::NoSlipD3Q19 >(blocks, pdfsID); - UBBD3Q19Object = std::make_shared< lbm::UBBD3Q19 >(blocks, pdfsID, u_x, u_y, u_z); + OutflowD3Q19Object = std::make_shared< lbm::OutflowD3Q19 >(blocks, pdfsID_); + FixedDensityD3Q19Object = std::make_shared< lbm::FixedDensityD3Q19 >(blocks, pdfsID_, density); + FreeSlipD3Q19Object = std::make_shared< lbm::FreeSlipD3Q19 >(blocks, pdfsID_); + NoSlipD3Q19Object = std::make_shared< lbm::NoSlipD3Q19 >(blocks, pdfsID_); + UBBD3Q19Object = std::make_shared< lbm::UBBD3Q19 >(blocks, pdfsID_, u_x, u_y, u_z); OutflowD3Q19Object->fillFromFlagField(blocks, flagID, walberla::FlagUID("Outflow"), domainUID); @@ -120,4 +122,4 @@ class D3Q19BoundaryCollection }; } -} +} \ No newline at end of file diff --git a/src/lbm_generated/boundary/D3Q27BoundaryCollection.h b/src/lbm_generated/boundary/D3Q27BoundaryCollection.h index 3428689b..cb7a039d 100644 --- a/src/lbm_generated/boundary/D3Q27BoundaryCollection.h +++ b/src/lbm_generated/boundary/D3Q27BoundaryCollection.h @@ -22,6 +22,8 @@ #include "core/DataTypes.h" #include "domain_decomposition/IBlock.h" + + #include "OutflowD3Q27.h" #include "FixedDensityD3Q27.h" #include "FreeSlipD3Q27.h" @@ -43,11 +45,11 @@ class D3Q27BoundaryCollection D3Q27BoundaryCollection(const shared_ptr & blocks, BlockDataID flagID_, BlockDataID pdfsID_, FlagUID domainUID_, double density, double u_x, double u_y, double u_z) : blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_) { - OutflowD3Q27Object = std::make_shared< lbm::OutflowD3Q27 >(blocks, pdfsID); - FixedDensityD3Q27Object = std::make_shared< lbm::FixedDensityD3Q27 >(blocks, pdfsID, density); - FreeSlipD3Q27Object = std::make_shared< lbm::FreeSlipD3Q27 >(blocks, pdfsID); - NoSlipD3Q27Object = std::make_shared< lbm::NoSlipD3Q27 >(blocks, pdfsID); - UBBD3Q27Object = std::make_shared< lbm::UBBD3Q27 >(blocks, pdfsID, u_x, u_y, u_z); + OutflowD3Q27Object = std::make_shared< lbm::OutflowD3Q27 >(blocks, pdfsID_); + FixedDensityD3Q27Object = std::make_shared< lbm::FixedDensityD3Q27 >(blocks, pdfsID_, density); + FreeSlipD3Q27Object = std::make_shared< lbm::FreeSlipD3Q27 >(blocks, pdfsID_); + NoSlipD3Q27Object = std::make_shared< lbm::NoSlipD3Q27 >(blocks, pdfsID_); + UBBD3Q27Object = std::make_shared< lbm::UBBD3Q27 >(blocks, pdfsID_, u_x, u_y, u_z); OutflowD3Q27Object->fillFromFlagField(blocks, flagID, walberla::FlagUID("Outflow"), domainUID); @@ -120,4 +122,4 @@ class D3Q27BoundaryCollection }; } -} +} \ No newline at end of file diff --git a/src/lbm_generated/boundary/FixedDensityD3Q19.cpp b/src/lbm_generated/boundary/FixedDensityD3Q19.cpp index e449704f..9bfdd22e 100644 --- a/src/lbm_generated/boundary/FixedDensityD3Q19.cpp +++ b/src/lbm_generated/boundary/FixedDensityD3Q19.cpp @@ -45,9 +45,9 @@ namespace lbm { #pragma diag_suppress 177 #endif #endif - +//NOLINTBEGIN(readability-non-const-parameter*) namespace internal_fixeddensityd3q19_even { -static FUNC_PREFIX void fixeddensityd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize) +static FUNC_PREFIX void fixeddensityd3q19_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize) { const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; @@ -59,29 +59,29 @@ static FUNC_PREFIX void fixeddensityd3q19_even(const uint8_t * RESTRICT const _d const double delta_rho = rho - 1.0; for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) { - const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0])); - const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4])); - const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8])); - const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12])); + const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0])); + const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4])); + const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8])); + const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12])); const double vel0Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3]; const double vel1Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3]; const double vel2Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 5*_stride_pdfs_3]; - const double u_0 = vel0Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3]; - const double u_1 = vel1Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3]; - const double u_2 = vel2Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3]; - const double u0Mu1 = u_0 + u_1*-1.0; + const double u_0 = vel0Term - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3]; + const double u_1 = vel1Term - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3]; + const double u_2 = vel2Term + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3]; + const double u0Mu1 = u_0 - u_1; const double u0Pu1 = u_0 + u_1; const double u1Pu2 = u_1 + u_2; - const double u1Mu2 = u_1 + u_2*-1.0; - const double u0Mu2 = u_0 + u_2*-1.0; + const double u1Mu2 = u_1 - u_2; + const double u0Mu2 = u_0 - u_2; const double u0Pu2 = u_0 + u_2; - const double f_eq_common = delta_rho - 1.0*(u_0*u_0) - 1.0*(u_1*u_1) - 1.0*(u_2*u_2); - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir] + 2.0*((((dir) == (0))) ? (f_eq_common*0.33333333333333331): ((((dir) == (1)) || ((dir) == (2))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): (0.0)))))))))))))))); + const double f_eq_common = delta_rho - u_0*u_0 - u_1*u_1 - u_2*u_2; + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = 2.0*((((dir) == (0))) ? (f_eq_common*0.33333333333333331): ((((dir) == (1)) || ((dir) == (2))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): (0.0)))))))))))))))) - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir]; } } } - +//NOLINTEND(readability-non-const-parameter*) #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/src/lbm_generated/boundary/FixedDensityD3Q19.h b/src/lbm_generated/boundary/FixedDensityD3Q19.h index b4575d18..2b6fa163 100644 --- a/src/lbm_generated/boundary/FixedDensityD3Q19.h +++ b/src/lbm_generated/boundary/FixedDensityD3Q19.h @@ -19,6 +19,7 @@ #pragma once #include "core/DataTypes.h" +#include "core/logging/Logging.h" #include "field/GhostLayerField.h" #include "domain_decomposition/BlockDataID.h" @@ -40,6 +41,10 @@ #define RESTRICT #endif +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +using walberla::half; +#endif + namespace walberla { namespace lbm { diff --git a/src/lbm_generated/boundary/FixedDensityD3Q27.cpp b/src/lbm_generated/boundary/FixedDensityD3Q27.cpp index 3ff43bc5..f8ccd833 100644 --- a/src/lbm_generated/boundary/FixedDensityD3Q27.cpp +++ b/src/lbm_generated/boundary/FixedDensityD3Q27.cpp @@ -45,9 +45,9 @@ namespace lbm { #pragma diag_suppress 177 #endif #endif - +//NOLINTBEGIN(readability-non-const-parameter*) namespace internal_fixeddensityd3q27_even { -static FUNC_PREFIX void fixeddensityd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize) +static FUNC_PREFIX void fixeddensityd3q27_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize) { const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; @@ -59,28 +59,29 @@ static FUNC_PREFIX void fixeddensityd3q27_even(const uint8_t * RESTRICT const _d const double delta_rho = rho - 1.0; for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) { - const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0])); - const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4])); - const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8])); - const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12])); + const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0])); + const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4])); + const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8])); + const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12])); const double vel0Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3]; const double vel1Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3]; const double vel2Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 5*_stride_pdfs_3]; - const double u_0 = vel0Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3]; - const double u_1 = vel1Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3]; - const double u_2 = vel2Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3]; - const double u0Mu1 = u_0 + u_1*-1.0; + const double u_0 = vel0Term - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3]; + const double u_1 = vel1Term - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3]; + const double u_2 = vel2Term + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3]; + const double u0Mu1 = u_0 - u_1; const double u0Pu1 = u_0 + u_1; const double u1Pu2 = u_1 + u_2; - const double u1Mu2 = u_1 + u_2*-1.0; - const double u0Mu2 = u_0 + u_2*-1.0; + const double u1Mu2 = u_1 - u_2; + const double u0Mu2 = u_0 - u_2; const double u0Pu2 = u_0 + u_2; const double f_eq_common = delta_rho - 1.5*(u_0*u_0) - 1.5*(u_1*u_1) - 1.5*(u_2*u_2); - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir] + 2.0*((((dir) == (0))) ? (f_eq_common*0.29629629629629628): ((((dir) == (1)) || ((dir) == (2))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (19))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (20))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (21))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (22)) || ((dir) == (23))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (24))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (25))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (26))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): (0.0))))))))))))))))))))))); + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = 2.0*((((dir) == (0))) ? (f_eq_common*0.29629629629629628): ((((dir) == (1)) || ((dir) == (2))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (19))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (20))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (21))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (22)) || ((dir) == (23))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (24))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (25))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (26))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): (0.0))))))))))))))))))))))) - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir]; } } } +//NOLINTEND(readability-non-const-parameter*) #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/src/lbm_generated/boundary/FixedDensityD3Q27.h b/src/lbm_generated/boundary/FixedDensityD3Q27.h index 359540d2..8efeb95c 100644 --- a/src/lbm_generated/boundary/FixedDensityD3Q27.h +++ b/src/lbm_generated/boundary/FixedDensityD3Q27.h @@ -19,6 +19,7 @@ #pragma once #include "core/DataTypes.h" +#include "core/logging/Logging.h" #include "field/GhostLayerField.h" #include "domain_decomposition/BlockDataID.h" @@ -40,6 +41,10 @@ #define RESTRICT #endif +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +using walberla::half; +#endif + namespace walberla { namespace lbm { diff --git a/src/lbm_generated/boundary/FreeSlipD3Q19.cpp b/src/lbm_generated/boundary/FreeSlipD3Q19.cpp index 2e3dc465..3bee7fa2 100644 --- a/src/lbm_generated/boundary/FreeSlipD3Q19.cpp +++ b/src/lbm_generated/boundary/FreeSlipD3Q19.cpp @@ -45,9 +45,9 @@ namespace lbm { #pragma diag_suppress 177 #endif #endif - +//NOLINTBEGIN(readability-non-const-parameter*) namespace internal_freeslipd3q19_even { -static FUNC_PREFIX void freeslipd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) +static FUNC_PREFIX void freeslipd3q19_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) { const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; @@ -63,16 +63,16 @@ static FUNC_PREFIX void freeslipd3q19_even(const uint8_t * RESTRICT const _data_ for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) { - const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0])); - const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4])); - const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8])); - const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12])); - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector[32*ctr_0 + 28]))]; + const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0])); + const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4])); + const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8])); + const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12])); + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector[32*ctr_0 + 28]))]; } } } - +//NOLINTEND(readability-non-const-parameter*) #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/src/lbm_generated/boundary/FreeSlipD3Q19.h b/src/lbm_generated/boundary/FreeSlipD3Q19.h index 4679ffc4..1c436dd3 100644 --- a/src/lbm_generated/boundary/FreeSlipD3Q19.h +++ b/src/lbm_generated/boundary/FreeSlipD3Q19.h @@ -19,6 +19,7 @@ #pragma once #include "core/DataTypes.h" +#include "core/logging/Logging.h" #include "field/GhostLayerField.h" #include "domain_decomposition/BlockDataID.h" @@ -40,6 +41,10 @@ #define RESTRICT #endif +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +using walberla::half; +#endif + namespace walberla { namespace lbm { @@ -253,7 +258,7 @@ class FreeSlipD3Q19 element.wnx = 0; element.wny = -1; element.wnz = 0; - ref_dir = 1; + ref_dir = 2; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -301,7 +306,7 @@ class FreeSlipD3Q19 element.wnx = 0; element.wny = 1; element.wnz = 0; - ref_dir = 2; + ref_dir = 1; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -349,7 +354,7 @@ class FreeSlipD3Q19 element.wnx = 1; element.wny = 0; element.wnz = 0; - ref_dir = 3; + ref_dir = 4; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -397,7 +402,7 @@ class FreeSlipD3Q19 element.wnx = -1; element.wny = 0; element.wnz = 0; - ref_dir = 4; + ref_dir = 3; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -445,7 +450,7 @@ class FreeSlipD3Q19 element.wnx = 0; element.wny = 0; element.wnz = -1; - ref_dir = 5; + ref_dir = 6; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -493,7 +498,7 @@ class FreeSlipD3Q19 element.wnx = 0; element.wny = 0; element.wnz = 1; - ref_dir = 6; + ref_dir = 5; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -541,7 +546,7 @@ class FreeSlipD3Q19 element.wnx = 1; element.wny = -1; element.wnz = 0; - ref_dir = 7; + ref_dir = 10; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -589,7 +594,7 @@ class FreeSlipD3Q19 element.wnx = -1; element.wny = -1; element.wnz = 0; - ref_dir = 8; + ref_dir = 9; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -637,7 +642,7 @@ class FreeSlipD3Q19 element.wnx = 1; element.wny = 1; element.wnz = 0; - ref_dir = 9; + ref_dir = 8; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -685,7 +690,7 @@ class FreeSlipD3Q19 element.wnx = -1; element.wny = 1; element.wnz = 0; - ref_dir = 10; + ref_dir = 7; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -733,7 +738,7 @@ class FreeSlipD3Q19 element.wnx = 0; element.wny = -1; element.wnz = -1; - ref_dir = 11; + ref_dir = 16; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -781,7 +786,7 @@ class FreeSlipD3Q19 element.wnx = 0; element.wny = 1; element.wnz = -1; - ref_dir = 12; + ref_dir = 15; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -829,7 +834,7 @@ class FreeSlipD3Q19 element.wnx = 1; element.wny = 0; element.wnz = -1; - ref_dir = 13; + ref_dir = 18; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -877,7 +882,7 @@ class FreeSlipD3Q19 element.wnx = -1; element.wny = 0; element.wnz = -1; - ref_dir = 14; + ref_dir = 17; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -925,7 +930,7 @@ class FreeSlipD3Q19 element.wnx = 0; element.wny = -1; element.wnz = 1; - ref_dir = 15; + ref_dir = 12; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -973,7 +978,7 @@ class FreeSlipD3Q19 element.wnx = 0; element.wny = 1; element.wnz = 1; - ref_dir = 16; + ref_dir = 11; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -1021,7 +1026,7 @@ class FreeSlipD3Q19 element.wnx = 1; element.wny = 0; element.wnz = 1; - ref_dir = 17; + ref_dir = 14; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -1069,7 +1074,7 @@ class FreeSlipD3Q19 element.wnx = -1; element.wny = 0; element.wnz = 1; - ref_dir = 18; + ref_dir = 13; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); diff --git a/src/lbm_generated/boundary/FreeSlipD3Q27.cpp b/src/lbm_generated/boundary/FreeSlipD3Q27.cpp index 3364610e..e71e3949 100644 --- a/src/lbm_generated/boundary/FreeSlipD3Q27.cpp +++ b/src/lbm_generated/boundary/FreeSlipD3Q27.cpp @@ -45,9 +45,9 @@ namespace lbm { #pragma diag_suppress 177 #endif #endif - +//NOLINTBEGIN(readability-non-const-parameter*) namespace internal_freeslipd3q27_even { -static FUNC_PREFIX void freeslipd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) +static FUNC_PREFIX void freeslipd3q27_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) { const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; @@ -63,16 +63,16 @@ static FUNC_PREFIX void freeslipd3q27_even(const uint8_t * RESTRICT const _data_ for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) { - const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0])); - const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4])); - const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8])); - const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12])); - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector[32*ctr_0 + 28]))]; + const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0])); + const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4])); + const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8])); + const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12])); + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector[32*ctr_0 + 28]))]; } } } - +//NOLINTEND(readability-non-const-parameter*) #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/src/lbm_generated/boundary/FreeSlipD3Q27.h b/src/lbm_generated/boundary/FreeSlipD3Q27.h index 562dfbca..759f7910 100644 --- a/src/lbm_generated/boundary/FreeSlipD3Q27.h +++ b/src/lbm_generated/boundary/FreeSlipD3Q27.h @@ -19,6 +19,7 @@ #pragma once #include "core/DataTypes.h" +#include "core/logging/Logging.h" #include "field/GhostLayerField.h" #include "domain_decomposition/BlockDataID.h" @@ -40,6 +41,10 @@ #define RESTRICT #endif +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +using walberla::half; +#endif + namespace walberla { namespace lbm { @@ -253,7 +258,7 @@ class FreeSlipD3Q27 element.wnx = 0; element.wny = -1; element.wnz = 0; - ref_dir = 1; + ref_dir = 2; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -301,7 +306,7 @@ class FreeSlipD3Q27 element.wnx = 0; element.wny = 1; element.wnz = 0; - ref_dir = 2; + ref_dir = 1; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -349,7 +354,7 @@ class FreeSlipD3Q27 element.wnx = 1; element.wny = 0; element.wnz = 0; - ref_dir = 3; + ref_dir = 4; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -397,7 +402,7 @@ class FreeSlipD3Q27 element.wnx = -1; element.wny = 0; element.wnz = 0; - ref_dir = 4; + ref_dir = 3; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -445,7 +450,7 @@ class FreeSlipD3Q27 element.wnx = 0; element.wny = 0; element.wnz = -1; - ref_dir = 5; + ref_dir = 6; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -493,7 +498,7 @@ class FreeSlipD3Q27 element.wnx = 0; element.wny = 0; element.wnz = 1; - ref_dir = 6; + ref_dir = 5; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -541,7 +546,7 @@ class FreeSlipD3Q27 element.wnx = 1; element.wny = -1; element.wnz = 0; - ref_dir = 7; + ref_dir = 10; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -589,7 +594,7 @@ class FreeSlipD3Q27 element.wnx = -1; element.wny = -1; element.wnz = 0; - ref_dir = 8; + ref_dir = 9; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -637,7 +642,7 @@ class FreeSlipD3Q27 element.wnx = 1; element.wny = 1; element.wnz = 0; - ref_dir = 9; + ref_dir = 8; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -685,7 +690,7 @@ class FreeSlipD3Q27 element.wnx = -1; element.wny = 1; element.wnz = 0; - ref_dir = 10; + ref_dir = 7; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -733,7 +738,7 @@ class FreeSlipD3Q27 element.wnx = 0; element.wny = -1; element.wnz = -1; - ref_dir = 11; + ref_dir = 16; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -781,7 +786,7 @@ class FreeSlipD3Q27 element.wnx = 0; element.wny = 1; element.wnz = -1; - ref_dir = 12; + ref_dir = 15; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -829,7 +834,7 @@ class FreeSlipD3Q27 element.wnx = 1; element.wny = 0; element.wnz = -1; - ref_dir = 13; + ref_dir = 18; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -877,7 +882,7 @@ class FreeSlipD3Q27 element.wnx = -1; element.wny = 0; element.wnz = -1; - ref_dir = 14; + ref_dir = 17; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -925,7 +930,7 @@ class FreeSlipD3Q27 element.wnx = 0; element.wny = -1; element.wnz = 1; - ref_dir = 15; + ref_dir = 12; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -973,7 +978,7 @@ class FreeSlipD3Q27 element.wnx = 0; element.wny = 1; element.wnz = 1; - ref_dir = 16; + ref_dir = 11; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -1021,7 +1026,7 @@ class FreeSlipD3Q27 element.wnx = 1; element.wny = 0; element.wnz = 1; - ref_dir = 17; + ref_dir = 14; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -1069,7 +1074,7 @@ class FreeSlipD3Q27 element.wnx = -1; element.wny = 0; element.wnz = 1; - ref_dir = 18; + ref_dir = 13; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -1117,7 +1122,7 @@ class FreeSlipD3Q27 element.wnx = -1; element.wny = -1; element.wnz = -1; - ref_dir = 19; + ref_dir = 26; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -1165,7 +1170,7 @@ class FreeSlipD3Q27 element.wnx = 1; element.wny = -1; element.wnz = -1; - ref_dir = 20; + ref_dir = 25; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -1213,7 +1218,7 @@ class FreeSlipD3Q27 element.wnx = -1; element.wny = 1; element.wnz = -1; - ref_dir = 21; + ref_dir = 24; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -1261,7 +1266,7 @@ class FreeSlipD3Q27 element.wnx = 1; element.wny = 1; element.wnz = -1; - ref_dir = 22; + ref_dir = 23; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -1309,7 +1314,7 @@ class FreeSlipD3Q27 element.wnx = -1; element.wny = -1; element.wnz = 1; - ref_dir = 23; + ref_dir = 22; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -1357,7 +1362,7 @@ class FreeSlipD3Q27 element.wnx = 1; element.wny = -1; element.wnz = 1; - ref_dir = 24; + ref_dir = 21; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -1405,7 +1410,7 @@ class FreeSlipD3Q27 element.wnx = -1; element.wny = 1; element.wnz = 1; - ref_dir = 25; + ref_dir = 20; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); @@ -1453,7 +1458,7 @@ class FreeSlipD3Q27 element.wnx = 1; element.wny = 1; element.wnz = 1; - ref_dir = 26; + ref_dir = 19; } element.ref_dir = ref_dir; indexVectorAll.push_back( element ); diff --git a/src/lbm_generated/boundary/NoSlipD3Q19.cpp b/src/lbm_generated/boundary/NoSlipD3Q19.cpp index 268cbf43..b56a975a 100644 --- a/src/lbm_generated/boundary/NoSlipD3Q19.cpp +++ b/src/lbm_generated/boundary/NoSlipD3Q19.cpp @@ -45,9 +45,9 @@ namespace lbm { #pragma diag_suppress 177 #endif #endif - +//NOLINTBEGIN(readability-non-const-parameter*) namespace internal_noslipd3q19_even { -static FUNC_PREFIX void noslipd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) +static FUNC_PREFIX void noslipd3q19_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) { const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; @@ -57,15 +57,16 @@ static FUNC_PREFIX void noslipd3q19_even(const uint8_t * RESTRICT const _data_in for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) { - const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0])); - const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4])); - const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8])); - const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12])); + const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0])); + const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4])); + const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8])); + const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12])); _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir]; } } } +//NOLINTEND(readability-non-const-parameter*) #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/src/lbm_generated/boundary/NoSlipD3Q19.h b/src/lbm_generated/boundary/NoSlipD3Q19.h index 933108ee..7541e7a9 100644 --- a/src/lbm_generated/boundary/NoSlipD3Q19.h +++ b/src/lbm_generated/boundary/NoSlipD3Q19.h @@ -19,6 +19,7 @@ #pragma once #include "core/DataTypes.h" +#include "core/logging/Logging.h" #include "field/GhostLayerField.h" #include "domain_decomposition/BlockDataID.h" @@ -40,6 +41,10 @@ #define RESTRICT #endif +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +using walberla::half; +#endif + namespace walberla { namespace lbm { diff --git a/src/lbm_generated/boundary/NoSlipD3Q27.cpp b/src/lbm_generated/boundary/NoSlipD3Q27.cpp index c38bee81..9adee224 100644 --- a/src/lbm_generated/boundary/NoSlipD3Q27.cpp +++ b/src/lbm_generated/boundary/NoSlipD3Q27.cpp @@ -45,9 +45,9 @@ namespace lbm { #pragma diag_suppress 177 #endif #endif - +//NOLINTBEGIN(readability-non-const-parameter*) namespace internal_noslipd3q27_even { -static FUNC_PREFIX void noslipd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) +static FUNC_PREFIX void noslipd3q27_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) { const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; @@ -57,16 +57,16 @@ static FUNC_PREFIX void noslipd3q27_even(const uint8_t * RESTRICT const _data_in for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) { - const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0])); - const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4])); - const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8])); - const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12])); + const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0])); + const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4])); + const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8])); + const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12])); _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir]; } } } - +//NOLINTEND(readability-non-const-parameter*) #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/src/lbm_generated/boundary/NoSlipD3Q27.h b/src/lbm_generated/boundary/NoSlipD3Q27.h index 56bbfb06..de0a6a35 100644 --- a/src/lbm_generated/boundary/NoSlipD3Q27.h +++ b/src/lbm_generated/boundary/NoSlipD3Q27.h @@ -19,6 +19,7 @@ #pragma once #include "core/DataTypes.h" +#include "core/logging/Logging.h" #include "field/GhostLayerField.h" #include "domain_decomposition/BlockDataID.h" @@ -40,6 +41,10 @@ #define RESTRICT #endif +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +using walberla::half; +#endif + namespace walberla { namespace lbm { diff --git a/src/lbm_generated/boundary/OutflowD3Q19.cpp b/src/lbm_generated/boundary/OutflowD3Q19.cpp index d42cf904..13e14a59 100644 --- a/src/lbm_generated/boundary/OutflowD3Q19.cpp +++ b/src/lbm_generated/boundary/OutflowD3Q19.cpp @@ -45,13 +45,13 @@ namespace lbm { #pragma diag_suppress 177 #endif #endif - +//NOLINTBEGIN(readability-non-const-parameter*) namespace internal_outflowd3q19_even { -static FUNC_PREFIX void outflowd3q19_even(const uint8_t * RESTRICT _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) +static FUNC_PREFIX void outflowd3q19_even(uint8_t * RESTRICT _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) { - const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; + const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 }; const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 }; const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; @@ -64,19 +64,19 @@ static FUNC_PREFIX void outflowd3q19_even(const uint8_t * RESTRICT _data_indexV for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) { - const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0])); - const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4])); - const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8])); - const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12])); - const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double * )(& _data_indexVector[32*ctr_0 + 16])); + const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0])); + const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4])); + const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8])); + const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12])); + const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double * )(& _data_indexVector[32*ctr_0 + 16])); _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = pdf_inter; - *((double * )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]]; - *((double * )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter; + *((double * )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]]; + *((double * )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter; } } } - +//NOLINTEND(readability-non-const-parameter*) #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/src/lbm_generated/boundary/OutflowD3Q19.h b/src/lbm_generated/boundary/OutflowD3Q19.h index bb299996..bcde26bf 100644 --- a/src/lbm_generated/boundary/OutflowD3Q19.h +++ b/src/lbm_generated/boundary/OutflowD3Q19.h @@ -19,6 +19,7 @@ #pragma once #include "core/DataTypes.h" +#include "core/logging/Logging.h" #include "field/GhostLayerField.h" #include "domain_decomposition/BlockDataID.h" @@ -40,6 +41,10 @@ #define RESTRICT #endif +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +using walberla::half; +#endif + namespace walberla { namespace lbm { @@ -174,8 +179,8 @@ class OutflowD3Q19 if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 4 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -192,8 +197,8 @@ class OutflowD3Q19 if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 8 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -210,8 +215,8 @@ class OutflowD3Q19 if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 10 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -228,8 +233,8 @@ class OutflowD3Q19 if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 14 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -246,8 +251,8 @@ class OutflowD3Q19 if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 18 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); diff --git a/src/lbm_generated/boundary/OutflowD3Q27.cpp b/src/lbm_generated/boundary/OutflowD3Q27.cpp index 8ec9a490..70926d56 100644 --- a/src/lbm_generated/boundary/OutflowD3Q27.cpp +++ b/src/lbm_generated/boundary/OutflowD3Q27.cpp @@ -45,13 +45,13 @@ namespace lbm { #pragma diag_suppress 177 #endif #endif - +//NOLINTBEGIN(readability-non-const-parameter*) namespace internal_outflowd3q27_even { -static FUNC_PREFIX void outflowd3q27_even(const uint8_t * RESTRICT _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) +static FUNC_PREFIX void outflowd3q27_even(uint8_t * RESTRICT _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) { - const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; + const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1,1,-1,1,-1,1,-1,1,-1 }; const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0,1,1,-1,-1,1,1,-1,-1 }; const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 }; @@ -64,19 +64,19 @@ static FUNC_PREFIX void outflowd3q27_even(const uint8_t * RESTRICT _data_indexV for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) { - const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0])); - const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4])); - const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8])); - const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12])); - const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double * )(& _data_indexVector[32*ctr_0 + 16])); + const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0])); + const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4])); + const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8])); + const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12])); + const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double * )(& _data_indexVector[32*ctr_0 + 16])); _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = pdf_inter; - *((double * )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]]; - *((double * )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter; + *((double * )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]]; + *((double * )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter; } } } - +//NOLINTEND(readability-non-const-parameter*) #ifdef __GNUC__ #pragma GCC diagnostic pop #endif diff --git a/src/lbm_generated/boundary/OutflowD3Q27.h b/src/lbm_generated/boundary/OutflowD3Q27.h index 53b4e4ba..e62365e4 100644 --- a/src/lbm_generated/boundary/OutflowD3Q27.h +++ b/src/lbm_generated/boundary/OutflowD3Q27.h @@ -19,6 +19,7 @@ #pragma once #include "core/DataTypes.h" +#include "core/logging/Logging.h" #include "field/GhostLayerField.h" #include "domain_decomposition/BlockDataID.h" @@ -40,6 +41,10 @@ #define RESTRICT #endif +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +using walberla::half; +#endif + namespace walberla { namespace lbm { @@ -174,8 +179,8 @@ class OutflowD3Q27 if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 4 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -192,8 +197,8 @@ class OutflowD3Q27 if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 8 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -210,8 +215,8 @@ class OutflowD3Q27 if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 10 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -228,8 +233,8 @@ class OutflowD3Q27 if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 14 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -246,8 +251,8 @@ class OutflowD3Q27 if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 18 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -264,8 +269,8 @@ class OutflowD3Q27 if ( isFlagSet( it.neighbor(1, 1, 1 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 19 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -282,8 +287,8 @@ class OutflowD3Q27 if ( isFlagSet( it.neighbor(1, -1, 1 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 21 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -300,8 +305,8 @@ class OutflowD3Q27 if ( isFlagSet( it.neighbor(1, 1, -1 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 23 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -318,8 +323,8 @@ class OutflowD3Q27 if ( isFlagSet( it.neighbor(1, -1, -1 , 0 ), boundaryFlag ) ) { auto element = IndexInfo(it.x(), it.y(), it.z(), 25 ); - element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20); - element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20); + element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20) ); + element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20) ); indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); diff --git a/src/lbm_generated/boundary/UBBD3Q19.cpp b/src/lbm_generated/boundary/UBBD3Q19.cpp index 0a88d2fe..7de5a364 100644 --- a/src/lbm_generated/boundary/UBBD3Q19.cpp +++ b/src/lbm_generated/boundary/UBBD3Q19.cpp @@ -45,9 +45,9 @@ namespace lbm { #pragma diag_suppress 177 #endif #endif - +//NOLINTBEGIN(readability-non-const-parameter*) namespace internal_ubbd3q19_even { -static FUNC_PREFIX void ubbd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z) +static FUNC_PREFIX void ubbd3q19_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z) { const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 }; @@ -56,7 +56,7 @@ static FUNC_PREFIX void ubbd3q19_even(const uint8_t * RESTRICT const _data_index const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 }; - const double weights [] = {0.33333333333333333, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778}; + const double weights [] = {((double)(0.33333333333333333)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778))}; @@ -66,15 +66,16 @@ static FUNC_PREFIX void ubbd3q19_even(const uint8_t * RESTRICT const _data_index for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) { - const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0])); - const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4])); - const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8])); - const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12])); - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = (u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*-1.0*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir]; + const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0])); + const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4])); + const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8])); + const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12])); + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -(u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir]; } } } +//NOLINTEND(readability-non-const-parameter*) #ifdef __GNUC__ #pragma GCC diagnostic pop #endif @@ -101,8 +102,8 @@ void UBBD3Q19::run_impl(IBlock * block, IndexVectors::Type type) uint8_t timestep = pdfs->getTimestep(); auto & u_y = u_y_; - auto & u_x = u_x_; auto & u_z = u_z_; + auto & u_x = u_x_; WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers())) double * RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); diff --git a/src/lbm_generated/boundary/UBBD3Q19.h b/src/lbm_generated/boundary/UBBD3Q19.h index f57bac12..592d0000 100644 --- a/src/lbm_generated/boundary/UBBD3Q19.h +++ b/src/lbm_generated/boundary/UBBD3Q19.h @@ -19,6 +19,7 @@ #pragma once #include "core/DataTypes.h" +#include "core/logging/Logging.h" #include "field/GhostLayerField.h" #include "domain_decomposition/BlockDataID.h" @@ -40,6 +41,10 @@ #define RESTRICT #endif +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +using walberla::half; +#endif + namespace walberla { namespace lbm { diff --git a/src/lbm_generated/boundary/UBBD3Q27.cpp b/src/lbm_generated/boundary/UBBD3Q27.cpp index 08ee3ef3..b2cc6655 100644 --- a/src/lbm_generated/boundary/UBBD3Q27.cpp +++ b/src/lbm_generated/boundary/UBBD3Q27.cpp @@ -45,9 +45,9 @@ namespace lbm { #pragma diag_suppress 177 #endif #endif - +//NOLINTBEGIN(readability-non-const-parameter*) namespace internal_ubbd3q27_even { -static FUNC_PREFIX void ubbd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z) +static FUNC_PREFIX void ubbd3q27_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z) { const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 }; @@ -56,7 +56,7 @@ static FUNC_PREFIX void ubbd3q27_even(const uint8_t * RESTRICT const _data_index const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 }; - const double weights [] = {0.29629629629629630, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296}; + const double weights [] = {((double)(0.29629629629629630)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296))}; @@ -66,16 +66,16 @@ static FUNC_PREFIX void ubbd3q27_even(const uint8_t * RESTRICT const _data_index for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) { - const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0])); - const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4])); - const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8])); - const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12])); - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = (u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*-1.0*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir]; + const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0])); + const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4])); + const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8])); + const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12])); + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -(u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir]; } } } - +//NOLINTEND(readability-non-const-parameter*) #ifdef __GNUC__ #pragma GCC diagnostic pop #endif @@ -102,8 +102,8 @@ void UBBD3Q27::run_impl(IBlock * block, IndexVectors::Type type) uint8_t timestep = pdfs->getTimestep(); auto & u_y = u_y_; - auto & u_x = u_x_; auto & u_z = u_z_; + auto & u_x = u_x_; WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers())) double * RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0); const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); diff --git a/src/lbm_generated/boundary/UBBD3Q27.h b/src/lbm_generated/boundary/UBBD3Q27.h index b7836d69..9ce7c3eb 100644 --- a/src/lbm_generated/boundary/UBBD3Q27.h +++ b/src/lbm_generated/boundary/UBBD3Q27.h @@ -19,6 +19,7 @@ #pragma once #include "core/DataTypes.h" +#include "core/logging/Logging.h" #include "field/GhostLayerField.h" #include "domain_decomposition/BlockDataID.h" @@ -40,6 +41,10 @@ #define RESTRICT #endif +#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT +using walberla::half; +#endif + namespace walberla { namespace lbm { diff --git a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h index 1b3e43a5..b74539e9 100644 --- a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h +++ b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h @@ -54,6 +54,12 @@ class NonuniformPackingKernelsWrapper void localCopyDirection(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField, CellInterval& dstInterval, Direction dir) const = 0; + void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, + CellInterval dstInterval, Direction dir) const = 0; + + void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval, + PdfField_T* dstField, CellInterval dstInterval, Direction dir) const = 0; + void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, stencil::Direction dir) const = 0; @@ -110,6 +116,18 @@ class NonuniformPackingKernelsWrapper< PdfField_T, false > kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir); } + void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, + CellInterval dstInterval, Direction dir) const + { + kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir); + } + + void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval, + PdfField_T* dstField, CellInterval dstInterval, Direction dir) const + { + kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir); + } + void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, stencil::Direction dir) const { @@ -194,10 +212,33 @@ class NonuniformPackingKernelsWrapper< PdfField_T, true > kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, timestep); } + void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, + CellInterval dstInterval, Direction dir) const + { + uint8_t timestep = srcField->getTimestep(); + WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must " + "be on an odd timestep, while the source field could either be " + "on an even or an odd state.") + kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir, timestep); + } + + void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval, + PdfField_T* dstField, CellInterval dstInterval, Direction dir) const + { + uint8_t timestep = dstField->getTimestep(); + WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must " + "be on an even timestep, while the source field could either be " + "on an even or an odd state.") + kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, timestep); + } + void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer, stencil::Direction dir) const { uint8_t timestep = dstField->getTimestep(); + WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must " + "be on an odd timestep, while the source field could either be " + "on an even or an odd state.") kernels_.unpackRedistribute(dstField, ci, inBuffer, dir, timestep); } @@ -205,6 +246,9 @@ class NonuniformPackingKernelsWrapper< PdfField_T, true > unsigned char* outBuffer, Direction dir) const { uint8_t timestep = srcField->getTimestep(); + WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must " + "be on an even timestep, while the source field could either be " + "on an even or an odd state.") kernels_.packPartialCoalescence(srcField, maskField, ci, outBuffer, dir, timestep); } diff --git a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h index cf36a61f..7c45d93d 100644 --- a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h +++ b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h @@ -167,19 +167,19 @@ void NonuniformGeneratedPdfPackInfo< PdfField_T >::communicateLocalCoarseToFine( Direction unpackDir = dstIntervals[index].first; CellInterval dstInterval = dstIntervals[index].second; - uint_t packSize = kernels_.size(srcInterval); - #ifndef NDEBUG Direction const packDir = srcIntervals[index].first; WALBERLA_ASSERT_EQUAL(packDir, stencil::inverseDir[unpackDir]) uint_t unpackSize = kernels_.redistributeSize(dstInterval); - WALBERLA_ASSERT_EQUAL(packSize, unpackSize) + WALBERLA_ASSERT_EQUAL(kernels_.size(srcInterval), unpackSize) #endif // TODO: This is a dirty workaround. Code-generate direct redistribution! - std::vector< unsigned char > buffer(packSize); + std::vector< unsigned char > buffer(kernels_.size(srcInterval)); kernels_.packAll(srcField, srcInterval, &buffer[0]); kernels_.unpackRedistribute(dstField, dstInterval, &buffer[0], unpackDir); + + // kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, unpackDir); } } @@ -228,20 +228,20 @@ void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::comm CellInterval srcInterval; srcField->getGhostRegion(dir, srcInterval, 2); - uint_t packSize = kernels_.partialCoalescenceSize(srcInterval, dir); CellInterval dstInterval = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender->getId(), invDir, dstField); #ifndef NDEBUG uint_t unpackSize = kernels_.size(dstInterval, invDir); - WALBERLA_ASSERT_EQUAL(packSize, unpackSize) + WALBERLA_ASSERT_EQUAL(kernels_.partialCoalescenceSize(srcInterval, dir), unpackSize) #endif // TODO: This is a dirty workaround. Code-generate direct redistribution! - std::vector< unsigned char > buffer(packSize); + std::vector< unsigned char > buffer(kernels_.partialCoalescenceSize(srcInterval, dir)); kernels_.packPartialCoalescence(srcField, maskField, srcInterval, &buffer[0], dir); kernels_.unpackCoalescence(dstField, dstInterval, &buffer[0], invDir); + // kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir); } template< typename PdfField_T> diff --git a/src/lbm_generated/evaluation/PerformanceEvaluation.h b/src/lbm_generated/evaluation/PerformanceEvaluation.h index 36f112ac..98134e0d 100644 --- a/src/lbm_generated/evaluation/PerformanceEvaluation.h +++ b/src/lbm_generated/evaluation/PerformanceEvaluation.h @@ -92,6 +92,16 @@ class PerformanceEvaluationBase return c; } + uint64_t numberOfCells() const + { + return cells_.numberOfCells(); + } + + uint64_t numberOfFluidCells() const + { + return fluidCells_.numberOfCells(); + } + double mlups( const uint_t timeSteps, const double time ) const { double m( 0.0 ); diff --git a/src/lbm_generated/field/AddToStorage.h b/src/lbm_generated/field/AddToStorage.h index afb86819..95b0089d 100644 --- a/src/lbm_generated/field/AddToStorage.h +++ b/src/lbm_generated/field/AddToStorage.h @@ -41,7 +41,7 @@ class PdfFieldHandling : public field::BlockDataHandling< PdfField; PdfFieldHandling( const weak_ptr< StructuredBlockStorage > & blocks, const LatticeStorageSpecification_T & storageSpecification, - const uint_t nrOfGhostLayers, const field::Layout & layout, const shared_ptr< field::FieldAllocator > alloc = nullptr ) : + const uint_t nrOfGhostLayers, const field::Layout & layout, const shared_ptr< field::FieldAllocator > alloc = nullptr ) : blocks_( blocks ), storageSpecification_( storageSpecification ), nrOfGhostLayers_( nrOfGhostLayers ), layout_( layout ), alloc_( alloc ){} @@ -106,7 +106,7 @@ class PdfFieldHandling : public field::BlockDataHandling< PdfField > alloc_; + shared_ptr< field::FieldAllocator > alloc_; }; // class PdfFieldHandling @@ -121,10 +121,10 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c const field::Layout & layout = field::fzyx, const Set & requiredSelectors = Set::emptySet(), const Set & incompatibleSelectors = Set::emptySet(), - const shared_ptr< field::FieldAllocator > alloc = nullptr) + const shared_ptr< field::FieldAllocator > alloc = nullptr) { return blocks->addBlockData( make_shared< internal::PdfFieldHandling< LatticeStorageSpecification_T > >( - blocks, storageSpecification, ghostLayers, layout, alloc ), + blocks, storageSpecification, ghostLayers, layout, alloc ), identifier, requiredSelectors, incompatibleSelectors ); } @@ -134,7 +134,7 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c const field::Layout & layout = field::fzyx, const Set & requiredSelectors = Set::emptySet(), const Set & incompatibleSelectors = Set::emptySet(), - const shared_ptr< field::FieldAllocator > alloc = nullptr) + const shared_ptr< field::FieldAllocator > alloc = nullptr) { auto ghostLayers = uint_c(1); @@ -148,7 +148,7 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c const LatticeStorageSpecification_T & storageSpecification, const Set & requiredSelectors = Set::emptySet(), const Set & incompatibleSelectors = Set::emptySet(), - const shared_ptr< field::FieldAllocator > alloc = nullptr) + const shared_ptr< field::FieldAllocator > alloc = nullptr) { auto ghostLayers = uint_c(1); auto layout = field::fzyx; @@ -161,7 +161,7 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c template< typename LatticeStorageSpecification_T, typename BlockStorage_T > BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, const std::string & identifier, const LatticeStorageSpecification_T & storageSpecification, - const shared_ptr< field::FieldAllocator > alloc = nullptr) + const shared_ptr< field::FieldAllocator > alloc = nullptr) { auto ghostLayers = uint_c(1); auto layout = field::fzyx; @@ -177,7 +177,7 @@ template< typename LatticeStorageSpecification_T, typename BlockStorage_T > BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, const std::string & identifier, const LatticeStorageSpecification_T & storageSpecification, const field::Layout & layout = field::fzyx, - const shared_ptr< field::FieldAllocator > alloc = nullptr) + const shared_ptr< field::FieldAllocator > alloc = nullptr) { auto ghostLayers = uint_c(1); auto requiredSelectors = Set::emptySet(); @@ -193,7 +193,7 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c const LatticeStorageSpecification_T & storageSpecification, const uint_t ghostLayers, const field::Layout & layout, - const shared_ptr< field::FieldAllocator > alloc) + const shared_ptr< field::FieldAllocator > alloc) { auto requiredSelectors = Set::emptySet(); auto incompatibleSelectors = Set::emptySet(); diff --git a/src/lbm_generated/field/PdfField.h b/src/lbm_generated/field/PdfField.h index 6e6b7ee8..2dce8976 100644 --- a/src/lbm_generated/field/PdfField.h +++ b/src/lbm_generated/field/PdfField.h @@ -28,7 +28,7 @@ namespace walberla::lbm_generated { template< typename LatticeStorageSpecification_T > -class PdfField : public GhostLayerField< real_t, LatticeStorageSpecification_T::Stencil::Size > +class PdfField : public GhostLayerField< typename LatticeStorageSpecification_T::value_type, LatticeStorageSpecification_T::Stencil::Size > { public: @@ -38,17 +38,17 @@ class PdfField : public GhostLayerField< real_t, LatticeStorageSpecification_T:: using LatticeStorageSpecification = LatticeStorageSpecification_T; using Stencil = typename LatticeStorageSpecification_T::Stencil; - using value_type = typename GhostLayerField::value_type; + using value_type = typename LatticeStorageSpecification_T::value_type; - using Ptr = typename GhostLayerField::Ptr; - using ConstPtr = typename GhostLayerField::ConstPtr; + using Ptr = typename GhostLayerField::Ptr; + using ConstPtr = typename GhostLayerField::ConstPtr; //@} //******************************************************************************************************************* PdfField( const uint_t _xSize, const uint_t _ySize, const uint_t _zSize, const LatticeStorageSpecification_T & storageSpecification, const uint_t ghostLayers = uint_t(1), const field::Layout & _layout = field::zyxf, - const shared_ptr< field::FieldAllocator > & alloc = shared_ptr< field::FieldAllocator >() ); + const shared_ptr< field::FieldAllocator > & alloc = shared_ptr< field::FieldAllocator >() ); ~PdfField() override = default; @@ -61,19 +61,19 @@ class PdfField : public GhostLayerField< real_t, LatticeStorageSpecification_T:: // Access functions (with stencil::Direction!) // ///////////////////////////////////////////////// - using GhostLayerField< real_t, Stencil::Size >::get; + using GhostLayerField< value_type, Stencil::Size >::get; - real_t & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) { return get( x, y, z, Stencil::idx[d] ); } - const real_t & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); } - real_t & get( const Cell & c, stencil::Direction d ) { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); } - const real_t & get( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); } + value_type & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) { return get( x, y, z, Stencil::idx[d] ); } + const value_type & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); } + value_type & get( const Cell & c, stencil::Direction d ) { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); } + const value_type & get( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); } - using GhostLayerField< real_t, Stencil::Size >::operator(); + using GhostLayerField< value_type, Stencil::Size >::operator(); - real_t & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) { return get( x, y, z, Stencil::idx[d] ); } - const real_t & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); } - real_t & operator()( const Cell & c, stencil::Direction d ) { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); } - const real_t & operator()( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); } + value_type & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) { return get( x, y, z, Stencil::idx[d] ); } + const value_type & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); } + value_type & operator()( const Cell & c, stencil::Direction d ) { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); } + const value_type & operator()( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); } protected: @@ -81,7 +81,7 @@ class PdfField : public GhostLayerField< real_t, LatticeStorageSpecification_T:: /*! \name Shallow Copy */ //@{ inline PdfField( const PdfField< LatticeStorageSpecification_T > & other ); - Field< real_t, Stencil::Size > * cloneShallowCopyInternal() const override { return new PdfField< LatticeStorageSpecification_T >( *this ); } + Field< value_type, Stencil::Size > * cloneShallowCopyInternal() const override { return new PdfField< LatticeStorageSpecification_T >( *this ); } //@} //******************************************************************************************************************* @@ -94,17 +94,17 @@ template< typename LatticeStorageSpecification_T > PdfField< LatticeStorageSpecification_T >::PdfField( const uint_t _xSize, const uint_t _ySize, const uint_t _zSize, const LatticeStorageSpecification_T & storageSpecification, const uint_t ghostLayers, const field::Layout & _layout, - const shared_ptr< field::FieldAllocator > & alloc ) : + const shared_ptr< field::FieldAllocator > & alloc ) : - GhostLayerField< real_t, Stencil::Size >( _xSize, _ySize, _zSize, ghostLayers, _layout, alloc ), + GhostLayerField< value_type, Stencil::Size >( _xSize, _ySize, _zSize, ghostLayers, _layout, alloc ), storageSpecification_( storageSpecification ) { #ifdef _OPENMP // take care of proper thread<->memory assignment (first-touch allocation policy !) - this->setWithGhostLayer( real_t(0) ); + this->setWithGhostLayer( value_type(0) ); #endif - this->setWithGhostLayer( real_t(0) ); + this->setWithGhostLayer( value_type(0) ); } @@ -112,24 +112,24 @@ PdfField< LatticeStorageSpecification_T >::PdfField( const uint_t _xSize, const template< typename LatticeStorageSpecification_T > inline PdfField< LatticeStorageSpecification_T > * PdfField< LatticeStorageSpecification_T >::clone() const { - return dynamic_cast< PdfField * >( GhostLayerField< real_t, Stencil::Size >::clone() ); + return dynamic_cast< PdfField * >( GhostLayerField< value_type, Stencil::Size >::clone() ); } template< typename LatticeStorageSpecification_T > inline PdfField< LatticeStorageSpecification_T > * PdfField< LatticeStorageSpecification_T >::cloneUninitialized() const { - return dynamic_cast< PdfField * >( GhostLayerField< real_t, Stencil::Size >::cloneUninitialized() ); + return dynamic_cast< PdfField * >( GhostLayerField< value_type, Stencil::Size >::cloneUninitialized() ); } template< typename LatticeStorageSpecification_T > inline PdfField< LatticeStorageSpecification_T > * PdfField< LatticeStorageSpecification_T >::cloneShallowCopy() const { - return dynamic_cast< PdfField * >( GhostLayerField< real_t, Stencil::Size >::cloneShallowCopy() ); + return dynamic_cast< PdfField * >( GhostLayerField< value_type, Stencil::Size >::cloneShallowCopy() ); } template< typename LatticeStorageSpecification_T > inline PdfField< LatticeStorageSpecification_T >::PdfField( const PdfField< LatticeStorageSpecification_T > & other ) - : GhostLayerField< real_t, Stencil::Size >::GhostLayerField( other ) + : GhostLayerField< value_type, Stencil::Size >::GhostLayerField( other ) { } diff --git a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h index 8d95855e..ff963ed5 100644 --- a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h +++ b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h @@ -33,6 +33,7 @@ namespace walberla { using gpu::communication::NonUniformGPUScheme; +using BlockFunction = std::function; // parameters: level namespace lbm_generated { @@ -67,26 +68,53 @@ class BasicRecursiveTimeStepGPU "Template parameter PdfField_T is of different type than BlockDataID pdfFieldId that is " "provided as constructor argument") #endif + useStreams_ = false; maxLevel_ = sbfs->getDepth(); + streams_.resize(maxLevel_ + 1); + timestepPerLevel_.resize(maxLevel_ + 1); - for (uint_t level = 0; level <= maxLevel_; level++) - { + for (uint_t level = 0; level <= maxLevel_; level++){ std::vector< Block* > blocks; sbfs->getBlocks(blocks, level); blocks_.push_back(blocks); + streams_[level].resize(nStreams_); + timestepPerLevel_[level] = uint8_c(0); + } + for (uint_t level = 0; level <= maxLevel_; level++){ + for (uint_t i = 0; i < nStreams_; i++){ + streams_[level][i] = nullptr; + } } }; - ~BasicRecursiveTimeStepGPU() = default; + ~BasicRecursiveTimeStepGPU(){ + if(useStreams_){ + for (uint_t level = 0; level <= maxLevel_; level++){ + for (uint_t i = 0; i < nStreams_; i++) + WALBERLA_GPU_CHECK(gpuStreamDestroy(streams_[level][i])) + } + } + } + + void activateStreams(){ + WALBERLA_LOG_INFO_ON_ROOT("Updating blocks using " << nStreams_ << " GPU Streams") + for (uint_t level = 0; level <= maxLevel_; level++){ + for (uint_t i = 0; i < nStreams_; i++) + WALBERLA_GPU_CHECK(gpuStreamCreate(&streams_[level][i])) + } + useStreams_ = true; + } + void operator()() { timestep(0); }; void addRefinementToTimeLoop(SweepTimeloop& timeloop, uint_t level = 0); - void test(uint_t maxLevel, uint_t level = 0); + void addPostBoundaryHandlingBlockFunction( const BlockFunction & function ); private: void timestep(uint_t level); void ghostLayerPropagation(Block* block, gpuStream_t gpuStream); std::function< void() > executeStreamCollideOnLevel(uint_t level, bool withGhostLayerPropagation = false); + std::function< void() > executePostBoundaryBlockFunctions(uint_t level); std::function< void() > executeBoundaryHandlingOnLevel(uint_t level); @@ -100,6 +128,12 @@ class BasicRecursiveTimeStepGPU SweepCollection_T& sweepCollection_; BoundaryCollection_T& boundaryCollection_; + std::vector< BlockFunction > globalPostBoundaryHandlingBlockFunctions_; + + std::vector< std::vector< gpuStream_t >> streams_; + uint_t nStreams_{uint_c(6)}; + bool useStreams_; + std::vector< uint8_t > timestepPerLevel_; }; } // namespace lbm_generated diff --git a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h index 6665cf35..0327b9b5 100644 --- a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h +++ b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h @@ -28,18 +28,13 @@ namespace lbm_generated { template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T > void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::timestep(uint_t level) { - std::vector blocks; - sbfs_->getBlocks(blocks, level); - - uint_t maxLevel = sbfs_->getDepth(); - // 1.1 Collision - for(auto b: blocks){ + for(auto b: blocks_[level]){ sweepCollection_.streamCollide(b); } // 1.2 Recursive Descent - if(level < maxLevel){ + if(level < maxLevel_){ timestep(level + 1); } @@ -52,13 +47,13 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio commScheme_->communicateEqualLevel(level); // 1.5 Boundary Handling and Coalescence Preparation - for(auto b : blocks){ + for(auto b : blocks_[level]){ boundaryCollection_(b, nullptr); - if(level != maxLevel) pdfFieldPackInfo_->prepareCoalescence(b); + if(level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b); } // 1.6 Fine to Coarse Communication, receiving end - if(level < maxLevel){ + if(level < maxLevel_){ commScheme_->communicateFineToCoarse(level + 1); } @@ -67,13 +62,13 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio if(level == 0) return; // 2.1 Collision and Ghost-Layer Propagation - for(auto b: blocks){ + for(auto b: blocks_[level]){ ghostLayerPropagation(b); // GL-Propagation first without swapping arrays... sweepCollection_.streamCollide(b); // then Stream-Collide on interior, and swap arrays } // 2.2 Recursive Descent - if(level < maxLevel){ + if(level < maxLevel_){ timestep(level + 1); } @@ -81,13 +76,13 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio commScheme_->communicateEqualLevel(level); // 2.5 Boundary Handling and Coalescence Preparation - for(auto b : blocks){ + for(auto b : blocks_[level]){ boundaryCollection_(b, nullptr); - if(level != maxLevel) pdfFieldPackInfo_->prepareCoalescence(b); + if(level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b); } // 2.6 Fine to Coarse Communication, receiving end - if(level < maxLevel){ + if(level < maxLevel_){ commScheme_->communicateFineToCoarse(level + 1); } } @@ -115,6 +110,7 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio // 1.5 Boundary Handling and Coalescence Preparation timeloop.addFuncBeforeTimeStep(executeBoundaryHandlingOnLevel(level), "Refinement Cycle: boundary handling on level " + std::to_string(level)); + timeloop.addFuncBeforeTimeStep(executePostBoundaryBlockFunctions(level), "Refinement Cycle: post boundary handling block functions on level " + std::to_string(level)); // 1.6 Fine to Coarse Communication, receiving end if(level < maxLevel_){ @@ -138,6 +134,7 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio // 2.5 Boundary Handling and Coalescence Preparation timeloop.addFuncBeforeTimeStep(executeBoundaryHandlingOnLevel(level), "Refinement Cycle: boundary handling on level " + std::to_string(level)); + timeloop.addFuncBeforeTimeStep(executePostBoundaryBlockFunctions(level), "Refinement Cycle: post boundary handling block functions on level " + std::to_string(level)); // 2.6 Fine to Coarse Communication, receiving end if(level < maxLevel_) @@ -145,91 +142,65 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio } -template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T > -void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::test(uint_t maxLevel, uint_t level) -{ - // 1.1 Collision - WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: streamCollide on level " + std::to_string(level)); - - // 1.2 Recursive Descent - if(level < maxLevel){ - test(maxLevel, level + 1); - } - - // 1.3 Coarse to Fine Communication, receiving end - if(level != 0){ - WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate coarse to fine on level " + std::to_string(level)); - } - - // 1.4 Equal-Level Communication - WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate equal level on level " + std::to_string(level)); - - - // 1.5 Boundary Handling and Coalescence Preparation - WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: boundary handling on level " + std::to_string(level)); - - // 1.6 Fine to Coarse Communication, receiving end - if(level < maxLevel){ - WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate fine to coarse on level " + std::to_string(level + 1)); - } - - // Stop here if on coarsest level. - // Otherwise, continue to second subcycle. - if(level == 0) return; - - // 2.1 Collision and Ghost-Layer Propagation - WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: streamCollide with ghost layer propagation on level " + std::to_string(level)); - - // 2.2 Recursive Descent - if(level < maxLevel) - test(maxLevel, level + 1); - - - // 2.4 Equal-Level Communication - WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate equal level on level " + std::to_string(level)); - - // 2.5 Boundary Handling and Coalescence Preparation - WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: boundary handling on level " + std::to_string(level)); - - // 2.6 Fine to Coarse Communication, receiving end - if(level < maxLevel) - WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate fine to coarse on level " + std::to_string(level + 1)); - -} - template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T > std::function BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executeStreamCollideOnLevel(uint_t level, bool withGhostLayerPropagation) { - return [level, withGhostLayerPropagation, this]() - { - if (withGhostLayerPropagation) - { - for(auto b: blocks_[level]){ - ghostLayerPropagation(b, nullptr); - sweepCollection_.streamCollide(b, 0, nullptr); + if(sweepCollection_.blockWise()){ + return [level, withGhostLayerPropagation, this](){ + if (withGhostLayerPropagation){ + const uint8_t timestepPlusOne = (timestepPerLevel_[level] + 1) & 1; + sweepCollection_.ghostLayerPropagation(level, timestepPlusOne); + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) + WALBERLA_GPU_CHECK(gpuPeekAtLastError()) + timestepPerLevel_[level] = (timestepPerLevel_[level] + 1) & 1; + sweepCollection_.streamCollideOverBlocks(level, timestepPerLevel_[level]); + for (uint_t i = 0; i < blocks_[level].size(); i++){ + auto pdfs = blocks_[level][i]->getData< PdfField_T >(pdfFieldId_); + pdfs->advanceTimestep(); + } + commScheme_->setTimestepForLevel(level, timestepPerLevel_[level]); + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) + WALBERLA_GPU_CHECK(gpuPeekAtLastError()) } - } - else - { - for(auto b: blocks_[level]){ - sweepCollection_.streamCollide(b, 0, nullptr); + else{ + timestepPerLevel_[level] = (timestepPerLevel_[level] + 1) & 1; + sweepCollection_.streamCollideOverBlocks(level, timestepPerLevel_[level]); + for (uint_t i = 0; i < blocks_[level].size(); i++){ + auto pdfs = blocks_[level][i]->getData< PdfField_T >(pdfFieldId_); + pdfs->advanceTimestep(); + } + commScheme_->setTimestepForLevel(level, timestepPerLevel_[level]); + WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) + WALBERLA_GPU_CHECK(gpuPeekAtLastError()) } - } - WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) - }; + }; + } + else{ + return [level, withGhostLayerPropagation, this](){ + if (withGhostLayerPropagation){ + for (uint_t i = 0; i < blocks_[level].size(); i++){ + ghostLayerPropagation(blocks_[level][i], streams_[level][i % nStreams_]); + sweepCollection_.streamCollide(blocks_[level][i], 0, streams_[level][i % nStreams_]); + } + } + else{ + for (uint_t i = 0; i < blocks_[level].size(); i++){ + sweepCollection_.streamCollide(blocks_[level][i], 0, streams_[level][i % nStreams_]); + } + } + }; + } } template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T > std::function BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executeBoundaryHandlingOnLevel(uint_t level) { return [this, level]() { - for (auto b : blocks_[level]) - { - boundaryCollection_(b, nullptr); - if (level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b, nullptr); + for (uint_t i = 0; i < blocks_[level].size(); i++){ + boundaryCollection_(blocks_[level][i], streams_[level][i % nStreams_]); + if (level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(blocks_[level][i], streams_[level][i % nStreams_]); } - WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) }; } @@ -251,5 +222,23 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio } } +template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T > +std::function BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executePostBoundaryBlockFunctions(uint_t level) +{ + return [this, level]() { + for( const auto& func : globalPostBoundaryHandlingBlockFunctions_ ){ + func(level); + } + }; +} + + +template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T > +inline void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::addPostBoundaryHandlingBlockFunction( const BlockFunction & function ) +{ + globalPostBoundaryHandlingBlockFunctions_.emplace_back( function ); +} + + } // namespace lbm_generated } // namespace walberla diff --git a/src/lbm_generated/gpu/GPUPdfField.h b/src/lbm_generated/gpu/GPUPdfField.h index 1a9f59a1..f67a84fe 100644 --- a/src/lbm_generated/gpu/GPUPdfField.h +++ b/src/lbm_generated/gpu/GPUPdfField.h @@ -28,7 +28,7 @@ using namespace walberla::gpu; namespace walberla::lbm_generated { template< typename LatticeStorageSpecification_T > -class GPUPdfField : public GPUField< real_t > +class GPUPdfField : public GPUField< typename LatticeStorageSpecification_T::value_type > { public: @@ -38,7 +38,7 @@ class GPUPdfField : public GPUField< real_t > using LatticeStorageSpecification = LatticeStorageSpecification_T; using Stencil = typename LatticeStorageSpecification_T::Stencil; - using value_type = typename GPUField::value_type; + using value_type = typename LatticeStorageSpecification_T::value_type; //@} //******************************************************************************************************************* @@ -59,7 +59,7 @@ template< typename LatticeStorageSpecification_T > GPUPdfField< LatticeStorageSpecification_T >::GPUPdfField( uint_t _xSize, uint_t _ySize, uint_t _zSize, const LatticeStorageSpecification_T & storageSpecification, uint_t ghostLayers, const Layout & layout, bool usePitchedMem) : - GPUField< real_t>( _xSize, _ySize, _zSize, LatticeStorageSpecification_T::Stencil::Size, ghostLayers, layout, usePitchedMem ), storageSpecification_( storageSpecification ) + GPUField( _xSize, _ySize, _zSize, LatticeStorageSpecification_T::Stencil::Size, ghostLayers, layout, usePitchedMem ), storageSpecification_( storageSpecification ) { } diff --git a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h index 585d1db3..a780bf48 100644 --- a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h +++ b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h @@ -42,6 +42,7 @@ template< typename PdfField_T, bool inplace > class NonuniformGPUPackingKernelsWrapper { public: + using value_type = typename PdfField_T::value_type; void packAll(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, gpuStream_t stream ) const = 0; void unpackAll(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, gpuStream_t stream ) const = 0; void localCopyAll(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, @@ -49,8 +50,15 @@ class NonuniformGPUPackingKernelsWrapper void packDirection(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, Direction dir, gpuStream_t stream ) const = 0; void unpackDirection(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, Direction dir, gpuStream_t stream ) const = 0; - void localCopyDirection(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, - CellInterval dstInterval, Direction dir, gpuStream_t stream) const = 0; + void localCopyDirection(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, CellInterval dstInterval, Direction dir, gpuStream_t stream) const = 0; + void blockLocalCopyDirection(value_type** data_pdfs_src_dp, value_type** data_pdfs_dst_dp, Direction dir, uint8_t timestep, gpuStream_t stream, std::array& sizes, std::array& strides) const = 0; + + + void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, + CellInterval dstInterval, Direction dir, gpuStream_t stream) const = 0; + + void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskFieldGPU* maskField, CellInterval srcInterval, + PdfField_T* dstField, CellInterval dstInterval, Direction dir, gpuStream_t stream) const = 0; void unpackRedistribute(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, stencil::Direction dir, gpuStream_t stream ) const = 0; @@ -64,6 +72,8 @@ class NonuniformGPUPackingKernelsWrapper uint_t size(CellInterval ci) const = 0; uint_t redistributeSize(CellInterval ci) const = 0; uint_t partialCoalescenceSize(CellInterval ci, Direction dir) const = 0; + + bool blockWise() const = 0; }; /* @@ -75,6 +85,7 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, false > public: using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification; using PackingKernels_T = typename LatticeStorageSpecification_T::PackKernels; + using value_type = typename PdfField_T::value_type; void packAll(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, gpuStream_t stream = nullptr) const { @@ -108,6 +119,23 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, false > kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, stream); } + void blockLocalCopyDirection(value_type** data_pdfs_src_dp, value_type** data_pdfs_dst_dp, Direction dir, uint8_t /*timestep*/, gpuStream_t stream, std::array& sizes, std::array& strides) const + { + kernels_.localCopyDirection(data_pdfs_src_dp, data_pdfs_dst_dp, dir, stream, sizes, strides); + } + + void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, + CellInterval dstInterval, Direction dir, gpuStream_t stream) const + { + kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir, stream); + } + + void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskFieldGPU* maskField, CellInterval srcInterval, + PdfField_T* dstField, CellInterval dstInterval, Direction dir, gpuStream_t stream) const + { + kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, stream); + } + void unpackRedistribute(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, stencil::Direction dir, gpuStream_t stream = nullptr) const { @@ -138,6 +166,8 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, false > return kernels_.partialCoalescenceSize(ci, dir); } + bool blockWise() const {return kernels_.blockWise;} + private: PackingKernels_T kernels_; }; @@ -151,6 +181,7 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, true > public: using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification; using PackingKernels_T = typename LatticeStorageSpecification_T::PackKernels; + using value_type = typename PdfField_T::value_type; void packAll(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, gpuStream_t stream = nullptr) const { @@ -192,10 +223,39 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, true > kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, timestep, stream); } + void blockLocalCopyDirection(value_type** data_pdfs_src_dp, value_type** data_pdfs_dst_dp, Direction dir, uint8_t timestep, gpuStream_t stream, std::array& sizes, std::array& strides) const + { + kernels_.localCopyDirection(data_pdfs_src_dp, data_pdfs_dst_dp, dir, timestep, stream, sizes, strides); + } + + + void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, + CellInterval dstInterval, Direction dir, gpuStream_t stream) const + { + uint8_t timestep = srcField->getTimestep(); + WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must " + "be on an odd timestep, while the source field could either be " + "on an even or an odd state.") + kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir, timestep, stream); + } + + void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskFieldGPU* maskField, CellInterval srcInterval, + PdfField_T* dstField, CellInterval dstInterval, Direction dir, gpuStream_t stream) const + { + uint8_t timestep = dstField->getTimestep(); + WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must " + "be on an even timestep, while the source field could either be " + "on an even or an odd state.") + kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, timestep, stream); + } + void unpackRedistribute(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, stencil::Direction dir, gpuStream_t stream = nullptr) const { uint8_t timestep = dstField->getTimestep(); + WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must " + "be on an odd timestep, while the source field could either be " + "on an even or an odd state.") kernels_.unpackRedistribute(dstField, ci, inBuffer, dir, timestep, stream); } @@ -203,6 +263,9 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, true > unsigned char* outBuffer, Direction dir, gpuStream_t stream = nullptr) const { uint8_t timestep = srcField->getTimestep(); + WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must " + "be on an even timestep, while the source field could either be " + "on an even or an odd state.") kernels_.packPartialCoalescence(srcField, maskField, ci, outBuffer, dir, timestep, stream); } @@ -226,6 +289,8 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, true > return kernels_.partialCoalescenceSize(ci, dir); } + bool blockWise() const {return kernels_.blockWise;} + private: PackingKernels_T kernels_; }; @@ -243,17 +308,52 @@ class NonuniformGeneratedGPUPdfPackInfo : public walberla::gpu::GeneratedNonUnif using Stencil = typename LatticeStorageSpecification_T::Stencil; using CommunicationStencil = typename LatticeStorageSpecification_T::CommunicationStencil; using CommData_T = NonuniformGPUCommData< LatticeStorageSpecification_T >; + using value_type = typename PdfField_T::value_type; + + NonuniformGeneratedGPUPdfPackInfo(const uint64_t meshLevels, const BlockDataID pdfFieldID, const BlockDataID commDataID) + : pdfFieldID_(pdfFieldID), commDataID_(commDataID){ init(meshLevels); }; + + void init(const uint64_t meshLevels){ + auto size = meshLevels * Stencil::Q; + equalCommSRC.resize(size); + equalCommDST.resize(size); + equalCommSRCGPU.resize(size); + equalCommDSTGPU.resize(size); - NonuniformGeneratedGPUPdfPackInfo(const BlockDataID pdfFieldID, const BlockDataID commDataID) - : pdfFieldID_(pdfFieldID), commDataID_(commDataID){}; + } + + void sync() override { + for (uint_t i = 0; i < equalCommSRC.size(); i++){ + for (auto const& x : equalCommSRC[i]){ + auto key = x.first; + WALBERLA_GPU_CHECK(gpuMalloc((void**) &equalCommSRCGPU[i][key], sizeof(value_type*) * equalCommSRC[i][key].size())); + WALBERLA_GPU_CHECK(gpuMemcpy(equalCommSRCGPU[i][key], &equalCommSRC[i][key][0],sizeof(value_type*) * equalCommSRC[i][key].size(), gpuMemcpyHostToDevice)); + + WALBERLA_GPU_CHECK(gpuMalloc((void**) &equalCommDSTGPU[i][key], sizeof(value_type*) * equalCommDST[i][key].size())); + WALBERLA_GPU_CHECK(gpuMemcpy(equalCommDSTGPU[i][key], &equalCommDST[i][key][0],sizeof(value_type*) * equalCommDST[i][key].size(), gpuMemcpyHostToDevice)); + + } + } + } + + ~NonuniformGeneratedGPUPdfPackInfo() { + for (uint_t i = 0; i < equalCommSRC.size(); i++){ + for (auto const& x : equalCommSRC[i]){ + auto key = x.first; + WALBERLA_GPU_CHECK(gpuFree(equalCommSRCGPU[i][key])) + WALBERLA_GPU_CHECK(gpuFree(equalCommDSTGPU[i][key])) + } + } + } bool constantDataExchange() const override { return true; }; bool threadsafeReceiving() const override { return false; }; /// Equal Level void unpackDataEqualLevel(Block* receiver, Direction dir, GpuBuffer_T& buffer, gpuStream_t stream) override; - void communicateLocalEqualLevel(const Block* sender, Block* receiver, stencil::Direction dir, - gpuStream_t stream) override; + void addForLocalEqualLevelComm(const Block* sender, Block* receiver, stencil::Direction dir) override; + void communicateLocalEqualLevel(uint64_t level, uint8_t timestep, gpuStream_t stream) override; + void communicateLocalEqualLevel(const Block* sender, Block* receiver, stencil::Direction dir, gpuStream_t stream) override; /// Coarse to Fine void unpackDataCoarseToFine(Block* fineReceiver, const BlockID& coarseSender, stencil::Direction dir, @@ -291,7 +391,7 @@ class NonuniformGeneratedGPUPdfPackInfo : public walberla::gpu::GeneratedNonUnif bool areNeighborsInDirection(const Block* block, const BlockID& neighborID, Vector3< cell_idx_t > dirVec) const; - CellInterval intervalHullInDirection(const CellInterval& ci, Vector3< cell_idx_t > dirVec, + CellInterval intervalHullInDirection(const CellInterval& ci, Vector3< cell_idx_t > tangentialDir, cell_idx_t width) const; bool skipsThroughCoarseBlock(const Block* block, Direction dir) const; @@ -306,6 +406,14 @@ class NonuniformGeneratedGPUPdfPackInfo : public walberla::gpu::GeneratedNonUnif const BlockDataID pdfFieldID_; internal::NonuniformGPUPackingKernelsWrapper< PdfField_T, LatticeStorageSpecification_T::inplace > kernels_; + std::array strides; + + std::vector,std::vector>> equalCommSRC; + std::vector,std::vector>> equalCommDST; + + std::vector,value_type **>> equalCommSRCGPU; + std::vector,value_type **>> equalCommDSTGPU; + public: const BlockDataID commDataID_; }; diff --git a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h index 7ff9c7fd..3f0b0ad5 100644 --- a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h +++ b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h @@ -55,7 +55,7 @@ std::shared_ptr< NonuniformGeneratedGPUPdfPackInfo< PdfField_T > > auto handling = std::make_shared >(blocks); BlockDataID commDataID = sbf->addBlockData(handling, dataIdentifier); - return std::make_shared >(pdfFieldID, commDataID); + return std::make_shared >(sbf->getNumberOfLevels(), pdfFieldID, commDataID); } @@ -81,6 +81,9 @@ template< typename PdfField_T> void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalEqualLevel( const Block* sender, Block* receiver, stencil::Direction dir, gpuStream_t stream) { + if(kernels_.blockWise()) + return; + auto srcField = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_); auto dstField = receiver->getData< PdfField_T >(pdfFieldID_); @@ -92,6 +95,57 @@ void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalEqualLevel kernels_.localCopyDirection(srcField, srcRegion, dstField, dstRegion, dir, stream); } +template< typename PdfField_T> +void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::addForLocalEqualLevelComm( + const Block* sender, Block* receiver, stencil::Direction dir) +{ + if(!kernels_.blockWise()) + return; + const uint_t level = sender->getLevel(); + auto srcField = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_); + auto dstField = receiver->getData< PdfField_T >(pdfFieldID_); + + CellInterval srcRegion; + CellInterval dstRegion; + cell_idx_t gls = skipsThroughCoarseBlock(sender, dir) ? 2 : 1; + srcField->getSliceBeforeGhostLayer(dir, srcRegion, gls, false); + dstField->getGhostRegion(stencil::inverseDir[dir], dstRegion, gls, false); + + strides[0] = int64_t(srcField->xStride()); + strides[1] = int64_t(srcField->yStride()); + strides[2] = int64_t(srcField->zStride()); + strides[3] = int64_t(1 * int64_t(srcField->fStride())); + + value_type* data_pdfs_dst = dstField->dataAt(dstRegion.xMin(), dstRegion.yMin(), dstRegion.zMin(), 0); + value_type* data_pdfs_src = srcField->dataAt(srcRegion.xMin(), srcRegion.yMin(), srcRegion.zMin(), 0); + + const uint_t index = level * Stencil::Q + dir; + Vector3 size(int64_c(srcRegion.xSize()), int64_c(srcRegion.ySize()), int64_c(srcRegion.zSize())); + + equalCommDST[index][size].emplace_back(data_pdfs_dst); + equalCommSRC[index][size].emplace_back(data_pdfs_src); +} + + +template< typename PdfField_T> +void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalEqualLevel(uint64_t level, uint8_t timestep, gpuStream_t stream) +{ + if(!kernels_.blockWise()) + return; + + for (auto dir = CommunicationStencil::beginNoCenter(); dir != CommunicationStencil::end(); ++dir){ + const uint_t index = level * Stencil::Q + *dir; + for (auto const& x : equalCommSRC[index]){ + auto key = x.first; + value_type** data_pdfs_src_dp = equalCommSRCGPU[index][key]; + value_type** data_pdfs_dst_dp = equalCommDSTGPU[index][key]; + std::array< int64_t, 4 > size = { int64_c(equalCommSRC[index][key].size()), key[0], key[1], key[2] }; + + kernels_.blockLocalCopyDirection(data_pdfs_src_dp, data_pdfs_dst_dp, *dir, timestep, stream, size, strides); + } + } +} + template< typename PdfField_T> void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::packDataEqualLevelImpl( @@ -168,21 +222,13 @@ void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalCoarseToFi Direction const unpackDir = dstIntervals[index].first; CellInterval dstInterval = dstIntervals[index].second; - uint_t packSize = kernels_.size(srcInterval); - #ifndef NDEBUG Direction const packDir = srcIntervals[index].first; WALBERLA_ASSERT_EQUAL(packDir, stencil::inverseDir[unpackDir]) uint_t unpackSize = kernels_.redistributeSize(dstInterval); - WALBERLA_ASSERT_EQUAL(packSize, unpackSize) + WALBERLA_ASSERT_EQUAL(kernels_.size(srcInterval), unpackSize) #endif - - // TODO: This is a dirty workaround. Code-generate direct redistribution! - unsigned char *buffer; - WALBERLA_GPU_CHECK( gpuMalloc( &buffer, packSize)) - kernels_.packAll(srcField, srcInterval, buffer, stream); - kernels_.unpackRedistribute(dstField, dstInterval, buffer, unpackDir, stream); - WALBERLA_GPU_CHECK(gpuFree(buffer)) + kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, unpackDir, stream); } } @@ -190,6 +236,9 @@ template< typename PdfField_T> void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalCoarseToFine( const Block* coarseSender, Block* fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream) { + // WARNING: This function uses an inplace buffer array. + // If possible the direct communicateLocalCoarseToFine without buffer array should be used + auto srcField = const_cast< Block* >(coarseSender)->getData< PdfField_T >(pdfFieldID_); auto dstField = fineReceiver->getData< PdfField_T >(pdfFieldID_); @@ -269,22 +318,16 @@ void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalFineToCoar CellInterval srcInterval; srcField->getGhostRegion(dir, srcInterval, 2); - uint_t packSize = kernels_.partialCoalescenceSize(srcInterval, dir); CellInterval dstInterval = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender->getId(), invDir, dstField); #ifndef NDEBUG uint_t unpackSize = kernels_.size(dstInterval, invDir); - WALBERLA_ASSERT_EQUAL(packSize, unpackSize) + WALBERLA_ASSERT_EQUAL(kernels_.partialCoalescenceSize(srcInterval, dir), unpackSize) #endif - // TODO: This is a dirty workaround. Code-generate direct redistribution! - unsigned char *buffer; - WALBERLA_GPU_CHECK( gpuMalloc( &buffer, packSize)) - kernels_.packPartialCoalescence(srcField, maskField, srcInterval, buffer, dir, stream); - kernels_.unpackCoalescence(dstField, dstInterval, buffer, invDir, stream); - WALBERLA_GPU_CHECK(gpuFree(buffer)) + kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, stream); } @@ -425,7 +468,7 @@ inline Vector3< cell_idx_t > } /** - * Returns the part of a cell interval's hull of given \p width in direction \p dirVec. + * Returns the part of a cell interval's hull of given width in direction dirVec. * @param ci The original cell interval * @param dirVec Direction Vector * @param width Width of the hull diff --git a/src/lbm_generated/refinement/RefinementScaling.h b/src/lbm_generated/refinement/RefinementScaling.h index f8015946..abee51e7 100644 --- a/src/lbm_generated/refinement/RefinementScaling.h +++ b/src/lbm_generated/refinement/RefinementScaling.h @@ -20,44 +20,18 @@ #pragma once -#include "blockforest/BlockDataHandling.h" +#include "core/DataTypes.h" -#include "domain_decomposition/IBlock.h" -#include "domain_decomposition/StructuredBlockStorage.h" - -namespace walberla -{ -namespace lbm_generated +namespace walberla::lbm_generated { -class DefaultRefinementScaling : public blockforest::AlwaysInitializeBlockDataHandling< real_t > +inline real_t relaxationRateScaling( real_t relaxationRate, uint_t refinementLevel ) { - public: - DefaultRefinementScaling(const weak_ptr< StructuredBlockStorage >& blocks, const real_t parameter) - : blocks_(blocks), parameter_(parameter){}; - - real_t* initialize(IBlock* const block) override - { - WALBERLA_ASSERT_NOT_NULLPTR(block) - auto blocks = blocks_.lock(); - WALBERLA_CHECK_NOT_NULLPTR(blocks) - - level_ = block->getBlockStorage().getLevel(*block); - - const real_t level_scale_factor = real_c(uint_t(1) << level_); - const real_t one = real_c(1.0); - const real_t half = real_c(0.5); - - return new real_t(parameter_ / (level_scale_factor * (-parameter_ * half + one) + parameter_ * half)); - } - bool operator==(const DefaultRefinementScaling& other) const { return level_ == other.level_; } - - private: - const weak_ptr< StructuredBlockStorage > blocks_; - const real_t parameter_; + const real_t levelScaleFactor = real_c(uint_c(1) << refinementLevel); + const real_t one = real_c(1.0); + const real_t half = real_c(0.5); - uint_t level_; -}; + return real_c(relaxationRate / (levelScaleFactor * (-relaxationRate * half + one) + relaxationRate * half)); +} -} // namespace lbm_generated -} // namespace walberla \ No newline at end of file +} // namespace walberla::lbm_generated \ No newline at end of file diff --git a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp index f36797ee..8a080f86 100644 --- a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp +++ b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp @@ -27,257 +27,108 @@ # pragma GCC diagnostic ignored "-Wunused-variable" #endif -/************************************************************************************* +namespace walberla { +namespace lbm { + + /************************************************************************************* * Kernel Definitions *************************************************************************************/ -namespace internal_d3q19storagespecification_pack_ALL { + namespace internal_d3q19storagespecification_pack_ALL { static FUNC_PREFIX void d3q19storagespecification_pack_ALL(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0; - double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30; - double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31; - double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32; - double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33; - double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34; - double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35; - double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 1] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 3] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 4] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 5] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 6] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 7] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 8] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 9] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 10] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 11] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 12] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 13] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 14] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 15] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 16] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 17] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 18] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 9] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 10] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 11] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 12] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 13] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 14] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 15] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 16] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 17] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; + _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 18] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_unpack_ALL { -static FUNC_PREFIX void d3q19storagespecification_unpack_ALL(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) + namespace internal_d3q19storagespecification_unpack_ALL { +static FUNC_PREFIX void d3q19storagespecification_unpack_ALL(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0; - double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30; - double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31; - double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32; - double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33; - double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34; - double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35; - double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36; - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2]; - _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 1]; - _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 2]; - _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 3]; - _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 4]; - _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 5]; - _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 6]; - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 7]; - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 8]; - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 9]; - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 10]; - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 11]; - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 12]; - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 13]; - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 14]; - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 15]; - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 16]; - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 17]; - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 18]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 5]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 6]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 7]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 8]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 9]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 10]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 11]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 12]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 13]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 14]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 15]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 16]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 17]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 18]; } } } } } -namespace internal_d3q19storagespecification_localCopy_ALL { + namespace internal_d3q19storagespecification_localCopy_ALL { static FUNC_PREFIX void d3q19storagespecification_localCopy_ALL(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0; - double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0; - double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30; - double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30; - double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31; - double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31; - double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32; - double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32; - double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33; - double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33; - double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34; - double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34; - double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35; - double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35; - double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36; - double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36; - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; } } } @@ -285,36 +136,36 @@ static FUNC_PREFIX void d3q19storagespecification_localCopy_ALL(double * RESTRIC } -namespace internal_d3q19storagespecification_pack_TE { -static FUNC_PREFIX void d3q19storagespecification_pack_TE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) + namespace internal_d3q19storagespecification_pack_B { +static FUNC_PREFIX void d3q19storagespecification_pack_B(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_SW { -static FUNC_PREFIX void d3q19storagespecification_pack_SW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_BW { +static FUNC_PREFIX void d3q19storagespecification_pack_BW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; } } } @@ -324,81 +175,65 @@ static FUNC_PREFIX void d3q19storagespecification_pack_SW(double * RESTRICT _da namespace internal_d3q19storagespecification_pack_T { static FUNC_PREFIX void d3q19storagespecification_pack_T(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_BS { -static FUNC_PREFIX void d3q19storagespecification_pack_BS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_NW { +static FUNC_PREFIX void d3q19storagespecification_pack_NW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_TN { -static FUNC_PREFIX void d3q19storagespecification_pack_TN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_SW { +static FUNC_PREFIX void d3q19storagespecification_pack_SW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_BW { -static FUNC_PREFIX void d3q19storagespecification_pack_BW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_BS { +static FUNC_PREFIX void d3q19storagespecification_pack_BS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; } } } @@ -408,809 +243,617 @@ static FUNC_PREFIX void d3q19storagespecification_pack_BW(double * RESTRICT _da namespace internal_d3q19storagespecification_pack_N { static FUNC_PREFIX void d3q19storagespecification_pack_N(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_E { -static FUNC_PREFIX void d3q19storagespecification_pack_E(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_BE { +static FUNC_PREFIX void d3q19storagespecification_pack_BE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_NW { -static FUNC_PREFIX void d3q19storagespecification_pack_NW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_W { +static FUNC_PREFIX void d3q19storagespecification_pack_W(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_NE { -static FUNC_PREFIX void d3q19storagespecification_pack_NE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_TS { +static FUNC_PREFIX void d3q19storagespecification_pack_TS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_TW { -static FUNC_PREFIX void d3q19storagespecification_pack_TW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_E { +static FUNC_PREFIX void d3q19storagespecification_pack_E(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_BE { -static FUNC_PREFIX void d3q19storagespecification_pack_BE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_SE { +static FUNC_PREFIX void d3q19storagespecification_pack_SE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_W { -static FUNC_PREFIX void d3q19storagespecification_pack_W(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_BN { +static FUNC_PREFIX void d3q19storagespecification_pack_BN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_S { -static FUNC_PREFIX void d3q19storagespecification_pack_S(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_TN { +static FUNC_PREFIX void d3q19storagespecification_pack_TN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_SE { -static FUNC_PREFIX void d3q19storagespecification_pack_SE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_NE { +static FUNC_PREFIX void d3q19storagespecification_pack_NE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_TS { -static FUNC_PREFIX void d3q19storagespecification_pack_TS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_TW { +static FUNC_PREFIX void d3q19storagespecification_pack_TW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_BN { -static FUNC_PREFIX void d3q19storagespecification_pack_BN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_TE { +static FUNC_PREFIX void d3q19storagespecification_pack_TE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_pack_B { -static FUNC_PREFIX void d3q19storagespecification_pack_B(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_pack_S { +static FUNC_PREFIX void d3q19storagespecification_pack_S(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_unpack_BW { -static FUNC_PREFIX void d3q19storagespecification_unpack_BW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) + namespace internal_d3q19storagespecification_unpack_N { +static FUNC_PREFIX void d3q19storagespecification_unpack_N(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4]; } } } } } -namespace internal_d3q19storagespecification_unpack_N { -static FUNC_PREFIX void d3q19storagespecification_unpack_N(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_TW { +static FUNC_PREFIX void d3q19storagespecification_unpack_TW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32; - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2]; - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1]; - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2]; - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3]; - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q19storagespecification_unpack_SE { -static FUNC_PREFIX void d3q19storagespecification_unpack_SE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_NW { +static FUNC_PREFIX void d3q19storagespecification_unpack_NW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q19storagespecification_unpack_TE { -static FUNC_PREFIX void d3q19storagespecification_unpack_TE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_S { +static FUNC_PREFIX void d3q19storagespecification_unpack_S(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4]; } } } } } -namespace internal_d3q19storagespecification_unpack_T { -static FUNC_PREFIX void d3q19storagespecification_unpack_T(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_BS { +static FUNC_PREFIX void d3q19storagespecification_unpack_BS(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36; - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2]; - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1]; - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2]; - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3]; - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q19storagespecification_unpack_TS { -static FUNC_PREFIX void d3q19storagespecification_unpack_TS(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_NE { +static FUNC_PREFIX void d3q19storagespecification_unpack_NE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q19storagespecification_unpack_BE { -static FUNC_PREFIX void d3q19storagespecification_unpack_BE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_BW { +static FUNC_PREFIX void d3q19storagespecification_unpack_BW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q19storagespecification_unpack_NW { -static FUNC_PREFIX void d3q19storagespecification_unpack_NW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_BN { +static FUNC_PREFIX void d3q19storagespecification_unpack_BN(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q19storagespecification_unpack_NE { -static FUNC_PREFIX void d3q19storagespecification_unpack_NE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_TN { +static FUNC_PREFIX void d3q19storagespecification_unpack_TN(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q19storagespecification_unpack_BS { -static FUNC_PREFIX void d3q19storagespecification_unpack_BS(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_W { +static FUNC_PREFIX void d3q19storagespecification_unpack_W(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4]; } } } } } -namespace internal_d3q19storagespecification_unpack_E { -static FUNC_PREFIX void d3q19storagespecification_unpack_E(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_SW { +static FUNC_PREFIX void d3q19storagespecification_unpack_SW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33; - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2]; - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1]; - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2]; - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3]; - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q19storagespecification_unpack_S { -static FUNC_PREFIX void d3q19storagespecification_unpack_S(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_SE { +static FUNC_PREFIX void d3q19storagespecification_unpack_SE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31; - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2]; - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1]; - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2]; - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3]; - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q19storagespecification_unpack_W { -static FUNC_PREFIX void d3q19storagespecification_unpack_W(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_E { +static FUNC_PREFIX void d3q19storagespecification_unpack_E(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34; - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2]; - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1]; - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2]; - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3]; - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4]; } } } } } -namespace internal_d3q19storagespecification_unpack_SW { -static FUNC_PREFIX void d3q19storagespecification_unpack_SW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_TS { +static FUNC_PREFIX void d3q19storagespecification_unpack_TS(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q19storagespecification_unpack_B { -static FUNC_PREFIX void d3q19storagespecification_unpack_B(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_BE { +static FUNC_PREFIX void d3q19storagespecification_unpack_BE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35; - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2]; - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1]; - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2]; - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3]; - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q19storagespecification_unpack_TN { -static FUNC_PREFIX void d3q19storagespecification_unpack_TN(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_B { +static FUNC_PREFIX void d3q19storagespecification_unpack_B(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4]; } } } } } -namespace internal_d3q19storagespecification_unpack_BN { -static FUNC_PREFIX void d3q19storagespecification_unpack_BN(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_TE { +static FUNC_PREFIX void d3q19storagespecification_unpack_TE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q19storagespecification_unpack_TW { -static FUNC_PREFIX void d3q19storagespecification_unpack_TW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q19storagespecification_unpack_T { +static FUNC_PREFIX void d3q19storagespecification_unpack_T(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4]; } } } } } -namespace internal_d3q19storagespecification_localCopy_NE { -static FUNC_PREFIX void d3q19storagespecification_localCopy_NE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) + namespace internal_d3q19storagespecification_localCopy_BE { +static FUNC_PREFIX void d3q19storagespecification_localCopy_BE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_TS { -static FUNC_PREFIX void d3q19storagespecification_localCopy_TS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_NE { +static FUNC_PREFIX void d3q19storagespecification_localCopy_NE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_BE { -static FUNC_PREFIX void d3q19storagespecification_localCopy_BE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_TE { +static FUNC_PREFIX void d3q19storagespecification_localCopy_TE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_BS { -static FUNC_PREFIX void d3q19storagespecification_localCopy_BS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_N { +static FUNC_PREFIX void d3q19storagespecification_localCopy_N(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_BW { -static FUNC_PREFIX void d3q19storagespecification_localCopy_BW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_NW { +static FUNC_PREFIX void d3q19storagespecification_localCopy_NW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_T { -static FUNC_PREFIX void d3q19storagespecification_localCopy_T(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_BS { +static FUNC_PREFIX void d3q19storagespecification_localCopy_BS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35; - double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35; - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; } } } @@ -1220,337 +863,209 @@ static FUNC_PREFIX void d3q19storagespecification_localCopy_T(double * RESTRICT namespace internal_d3q19storagespecification_localCopy_TN { static FUNC_PREFIX void d3q19storagespecification_localCopy_TN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_W { -static FUNC_PREFIX void d3q19storagespecification_localCopy_W(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_SE { +static FUNC_PREFIX void d3q19storagespecification_localCopy_SE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33; - double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33; - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_E { -static FUNC_PREFIX void d3q19storagespecification_localCopy_E(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_B { +static FUNC_PREFIX void d3q19storagespecification_localCopy_B(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34; - double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34; - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_TW { -static FUNC_PREFIX void d3q19storagespecification_localCopy_TW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_W { +static FUNC_PREFIX void d3q19storagespecification_localCopy_W(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_SW { -static FUNC_PREFIX void d3q19storagespecification_localCopy_SW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_TS { +static FUNC_PREFIX void d3q19storagespecification_localCopy_TS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_NW { -static FUNC_PREFIX void d3q19storagespecification_localCopy_NW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_TW { +static FUNC_PREFIX void d3q19storagespecification_localCopy_TW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_BN { -static FUNC_PREFIX void d3q19storagespecification_localCopy_BN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_BW { +static FUNC_PREFIX void d3q19storagespecification_localCopy_BW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_TE { -static FUNC_PREFIX void d3q19storagespecification_localCopy_TE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_SW { +static FUNC_PREFIX void d3q19storagespecification_localCopy_SW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_B { -static FUNC_PREFIX void d3q19storagespecification_localCopy_B(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_BN { +static FUNC_PREFIX void d3q19storagespecification_localCopy_BN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36; - double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36; - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_N { -static FUNC_PREFIX void d3q19storagespecification_localCopy_N(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_E { +static FUNC_PREFIX void d3q19storagespecification_localCopy_E(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31; - double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31; - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_S { -static FUNC_PREFIX void d3q19storagespecification_localCopy_S(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_T { +static FUNC_PREFIX void d3q19storagespecification_localCopy_T(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32; - double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32; - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q19storagespecification_localCopy_SE { -static FUNC_PREFIX void d3q19storagespecification_localCopy_SE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q19storagespecification_localCopy_S { +static FUNC_PREFIX void d3q19storagespecification_localCopy_S(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; } } } @@ -1558,15 +1073,12 @@ static FUNC_PREFIX void d3q19storagespecification_localCopy_SE(double * RESTRICT } + - -/************************************************************************************* + /************************************************************************************* * Kernel Wrappers *************************************************************************************/ -namespace walberla { -namespace lbm { - void D3Q19StorageSpecification::PackKernels::packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const { double * buffer = reinterpret_cast(outBuffer); diff --git a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h index 7c2fb9e8..7777f1d5 100644 --- a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h +++ b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h @@ -68,9 +68,35 @@ class D3Q19StorageSpecification // If true the background deviation (rho_0 = 1) is subtracted for the collision step. static const bool zeroCenteredPDFs = true; // Lattice weights - static constexpr double w[19] = { 0.333333333333333,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778 }; + static constexpr double w[19] = { double(0.333333333333333), double(0.0555555555555556), double(0.0555555555555556), double(0.0555555555555556), double(0.0555555555555556), double(0.0555555555555556), double(0.0555555555555556), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778) }; // Inverse lattice weights - static constexpr double wInv[19] = { 3.00000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000 }; + static constexpr double wInv[19] = { double(3.00000000000000), double(18.0000000000000), double(18.0000000000000), double(18.0000000000000), double(18.0000000000000), double(18.0000000000000), double(18.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000) }; + + struct AccessorEVEN + { + static constexpr cell_idx_t readX[19] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, -1 }; + static constexpr cell_idx_t readY[19] = { 0, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1, -1, 1, 0, 0, -1, 1, 0, 0 }; + static constexpr cell_idx_t readZ[19] = { 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1 }; + static constexpr cell_idx_t readD[19] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 }; + + static constexpr cell_idx_t writeX[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + static constexpr cell_idx_t writeY[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + static constexpr cell_idx_t writeZ[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + static constexpr cell_idx_t writeD[19] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 }; + }; + + struct AccessorODD + { + static constexpr cell_idx_t readX[19] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, -1 }; + static constexpr cell_idx_t readY[19] = { 0, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1, -1, 1, 0, 0, -1, 1, 0, 0 }; + static constexpr cell_idx_t readZ[19] = { 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1 }; + static constexpr cell_idx_t readD[19] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 }; + + static constexpr cell_idx_t writeX[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + static constexpr cell_idx_t writeY[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + static constexpr cell_idx_t writeZ[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + static constexpr cell_idx_t writeD[19] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 }; + }; // Compute kernels to pack and unpack MPI buffers class PackKernels { @@ -84,8 +110,8 @@ class D3Q19StorageSpecification static const bool inplace = false; /** - * Packs all pdfs from the given cell interval to the send buffer. - * */ + * Packs all pdfs from the given cell interval to the send buffer. + * */ void packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const; /** @@ -124,7 +150,7 @@ class D3Q19StorageSpecification * @return The required size of the buffer, in bytes * */ uint_t size (CellInterval & ci, stencil::Direction dir) const { - return ci.numCells() * sizes[dir] * sizeof(value_type); + return ci.numCells() * sizes[dir] * uint_c(sizeof(value_type)); } /** @@ -134,7 +160,7 @@ class D3Q19StorageSpecification * @return The required size of the buffer, in bytes * */ uint_t size (CellInterval & ci) const { - return ci.numCells() * 19 * sizeof(value_type); + return ci.numCells() * 19 * uint_c(sizeof(value_type)); } @@ -143,6 +169,8 @@ class D3Q19StorageSpecification const uint_t sizes[27] { 0, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; }; + using value_type = PackKernels::value_type; + }; }} //lbm/walberla \ No newline at end of file diff --git a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp index 3ecdf889..dce3446a 100644 --- a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp +++ b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp @@ -27,345 +27,132 @@ # pragma GCC diagnostic ignored "-Wunused-variable" #endif -/************************************************************************************* +namespace walberla { +namespace lbm { + + /************************************************************************************* * Kernel Definitions *************************************************************************************/ -namespace internal_d3q27storagespecification_pack_ALL { + namespace internal_d3q27storagespecification_pack_ALL { static FUNC_PREFIX void d3q27storagespecification_pack_ALL(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0; - double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30; - double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31; - double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32; - double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33; - double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34; - double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35; - double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) - { - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 1] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 3] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 4] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 5] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 6] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 7] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 8] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 9] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 10] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 11] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 12] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 13] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 14] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 15] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 16] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 17] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 18] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 19] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 20] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 21] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 22] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 23] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 24] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 25] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 26] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; - } - } - } -} -} - -namespace internal_d3q27storagespecification_unpack_ALL { -static FUNC_PREFIX void d3q27storagespecification_unpack_ALL(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) -{ - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0; - double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + { + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 9] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 10] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 11] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 12] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 13] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 14] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 15] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 16] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 17] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 18] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 19] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 20] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 21] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 22] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 23] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 24] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 25] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; + _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 26] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; + } + } + } +} +} + + namespace internal_d3q27storagespecification_unpack_ALL { +static FUNC_PREFIX void d3q27storagespecification_unpack_ALL(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +{ + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30; - double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31; - double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32; - double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33; - double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34; - double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35; - double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36; - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) - { - _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2]; - _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 1]; - _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 2]; - _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 3]; - _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 4]; - _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 5]; - _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 6]; - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 7]; - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 8]; - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 9]; - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 10]; - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 11]; - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 12]; - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 13]; - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 14]; - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 15]; - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 16]; - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 17]; - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 18]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 19]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 20]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 21]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 22]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 23]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 24]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 25]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 26]; - } - } - } -} -} - -namespace internal_d3q27storagespecification_localCopy_ALL { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + { + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 5]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 6]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 7]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 8]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 9]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 10]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 11]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 12]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 13]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 14]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 15]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 16]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 17]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 18]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 19]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 20]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 21]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 22]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 23]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 24]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 25]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 26]; + } + } + } +} +} + + namespace internal_d3q27storagespecification_localCopy_ALL { static FUNC_PREFIX void d3q27storagespecification_localCopy_ALL(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0; - double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0; - double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30; - double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30; - double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31; - double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31; - double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32; - double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32; - double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33; - double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33; - double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34; - double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34; - double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35; - double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35; - double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36; - double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36; - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) - { - _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + { + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } @@ -373,276 +160,200 @@ static FUNC_PREFIX void d3q27storagespecification_localCopy_ALL(double * RESTRIC } -namespace internal_d3q27storagespecification_pack_T { -static FUNC_PREFIX void d3q27storagespecification_pack_T(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) + namespace internal_d3q27storagespecification_pack_BE { +static FUNC_PREFIX void d3q27storagespecification_pack_BE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_BN { -static FUNC_PREFIX void d3q27storagespecification_pack_BN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_SE { +static FUNC_PREFIX void d3q27storagespecification_pack_SE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_NE { -static FUNC_PREFIX void d3q27storagespecification_pack_NE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_BW { +static FUNC_PREFIX void d3q27storagespecification_pack_BW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_BNE { -static FUNC_PREFIX void d3q27storagespecification_pack_BNE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_BSW { +static FUNC_PREFIX void d3q27storagespecification_pack_BSW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_SE { -static FUNC_PREFIX void d3q27storagespecification_pack_SE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_N { +static FUNC_PREFIX void d3q27storagespecification_pack_N(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_TNW { -static FUNC_PREFIX void d3q27storagespecification_pack_TNW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_SW { +static FUNC_PREFIX void d3q27storagespecification_pack_SW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_W { -static FUNC_PREFIX void d3q27storagespecification_pack_W(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_T { +static FUNC_PREFIX void d3q27storagespecification_pack_T(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_TE { -static FUNC_PREFIX void d3q27storagespecification_pack_TE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_B { +static FUNC_PREFIX void d3q27storagespecification_pack_B(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_N { -static FUNC_PREFIX void d3q27storagespecification_pack_N(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_TNW { +static FUNC_PREFIX void d3q27storagespecification_pack_TNW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_BSW { -static FUNC_PREFIX void d3q27storagespecification_pack_BSW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_W { +static FUNC_PREFIX void d3q27storagespecification_pack_W(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } @@ -652,165 +363,123 @@ static FUNC_PREFIX void d3q27storagespecification_pack_BSW(double * RESTRICT _d namespace internal_d3q27storagespecification_pack_TSW { static FUNC_PREFIX void d3q27storagespecification_pack_TSW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_BE { -static FUNC_PREFIX void d3q27storagespecification_pack_BE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_E { +static FUNC_PREFIX void d3q27storagespecification_pack_E(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_B { -static FUNC_PREFIX void d3q27storagespecification_pack_B(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_BSE { +static FUNC_PREFIX void d3q27storagespecification_pack_BSE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_TNE { -static FUNC_PREFIX void d3q27storagespecification_pack_TNE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_TN { +static FUNC_PREFIX void d3q27storagespecification_pack_TN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_TS { -static FUNC_PREFIX void d3q27storagespecification_pack_TS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_BNW { +static FUNC_PREFIX void d3q27storagespecification_pack_BNW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_TN { -static FUNC_PREFIX void d3q27storagespecification_pack_TN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_TS { +static FUNC_PREFIX void d3q27storagespecification_pack_TS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_BNW { -static FUNC_PREFIX void d3q27storagespecification_pack_BNW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_NW { +static FUNC_PREFIX void d3q27storagespecification_pack_NW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; } } } @@ -820,105 +489,71 @@ static FUNC_PREFIX void d3q27storagespecification_pack_BNW(double * RESTRICT _d namespace internal_d3q27storagespecification_pack_TW { static FUNC_PREFIX void d3q27storagespecification_pack_TW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_BSE { -static FUNC_PREFIX void d3q27storagespecification_pack_BSE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_S { +static FUNC_PREFIX void d3q27storagespecification_pack_S(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; + _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_NW { -static FUNC_PREFIX void d3q27storagespecification_pack_NW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_TNE { +static FUNC_PREFIX void d3q27storagespecification_pack_TNE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_S { -static FUNC_PREFIX void d3q27storagespecification_pack_S(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_TSE { +static FUNC_PREFIX void d3q27storagespecification_pack_TSE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; } } } @@ -928,1693 +563,1061 @@ static FUNC_PREFIX void d3q27storagespecification_pack_S(double * RESTRICT _dat namespace internal_d3q27storagespecification_pack_BS { static FUNC_PREFIX void d3q27storagespecification_pack_BS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_TSE { -static FUNC_PREFIX void d3q27storagespecification_pack_TSE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_TE { +static FUNC_PREFIX void d3q27storagespecification_pack_TE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_SW { -static FUNC_PREFIX void d3q27storagespecification_pack_SW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_BN { +static FUNC_PREFIX void d3q27storagespecification_pack_BN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_BW { -static FUNC_PREFIX void d3q27storagespecification_pack_BW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_NE { +static FUNC_PREFIX void d3q27storagespecification_pack_NE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_pack_E { -static FUNC_PREFIX void d3q27storagespecification_pack_E(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_pack_BNE { +static FUNC_PREFIX void d3q27storagespecification_pack_BNE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1) { - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; - _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; + _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_unpack_TSE { -static FUNC_PREFIX void d3q27storagespecification_unpack_TSE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) + namespace internal_d3q27storagespecification_unpack_SE { +static FUNC_PREFIX void d3q27storagespecification_unpack_SE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2]; } } } } } -namespace internal_d3q27storagespecification_unpack_T { -static FUNC_PREFIX void d3q27storagespecification_unpack_T(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) -{ - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; +namespace internal_d3q27storagespecification_unpack_BS { +static FUNC_PREFIX void d3q27storagespecification_unpack_BS(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +{ + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36; - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2]; - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1]; - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2]; - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3]; - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2]; } } } } } -namespace internal_d3q27storagespecification_unpack_TN { -static FUNC_PREFIX void d3q27storagespecification_unpack_TN(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_BNW { +static FUNC_PREFIX void d3q27storagespecification_unpack_BNW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q27storagespecification_unpack_SW { -static FUNC_PREFIX void d3q27storagespecification_unpack_SW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_BSW { +static FUNC_PREFIX void d3q27storagespecification_unpack_BSW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q27storagespecification_unpack_TNE { -static FUNC_PREFIX void d3q27storagespecification_unpack_TNE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_W { +static FUNC_PREFIX void d3q27storagespecification_unpack_W(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8]; } } } } } -namespace internal_d3q27storagespecification_unpack_BN { -static FUNC_PREFIX void d3q27storagespecification_unpack_BN(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_TN { +static FUNC_PREFIX void d3q27storagespecification_unpack_TN(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2]; } } } } } -namespace internal_d3q27storagespecification_unpack_W { -static FUNC_PREFIX void d3q27storagespecification_unpack_W(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) -{ - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; +namespace internal_d3q27storagespecification_unpack_E { +static FUNC_PREFIX void d3q27storagespecification_unpack_E(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +{ + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34; - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2]; - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1]; - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2]; - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3]; - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8]; } } } } } -namespace internal_d3q27storagespecification_unpack_E { -static FUNC_PREFIX void d3q27storagespecification_unpack_E(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) -{ - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; +namespace internal_d3q27storagespecification_unpack_TS { +static FUNC_PREFIX void d3q27storagespecification_unpack_TS(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +{ + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33; - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2]; - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1]; - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2]; - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3]; - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2]; } } } } } -namespace internal_d3q27storagespecification_unpack_BNE { -static FUNC_PREFIX void d3q27storagespecification_unpack_BNE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_TW { +static FUNC_PREFIX void d3q27storagespecification_unpack_TW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2]; } } } } } -namespace internal_d3q27storagespecification_unpack_TNW { -static FUNC_PREFIX void d3q27storagespecification_unpack_TNW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_NE { +static FUNC_PREFIX void d3q27storagespecification_unpack_NE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2]; } } } } } -namespace internal_d3q27storagespecification_unpack_BSE { -static FUNC_PREFIX void d3q27storagespecification_unpack_BSE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_NW { +static FUNC_PREFIX void d3q27storagespecification_unpack_NW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2]; } } } } } -namespace internal_d3q27storagespecification_unpack_BSW { -static FUNC_PREFIX void d3q27storagespecification_unpack_BSW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_TE { +static FUNC_PREFIX void d3q27storagespecification_unpack_TE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2]; } } } } } -namespace internal_d3q27storagespecification_unpack_SE { -static FUNC_PREFIX void d3q27storagespecification_unpack_SE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_S { +static FUNC_PREFIX void d3q27storagespecification_unpack_S(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8]; } } } } } -namespace internal_d3q27storagespecification_unpack_N { -static FUNC_PREFIX void d3q27storagespecification_unpack_N(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) -{ - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; +namespace internal_d3q27storagespecification_unpack_B { +static FUNC_PREFIX void d3q27storagespecification_unpack_B(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +{ + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32; - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2]; - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1]; - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2]; - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3]; - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8]; } } } } } -namespace internal_d3q27storagespecification_unpack_NE { -static FUNC_PREFIX void d3q27storagespecification_unpack_NE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_BN { +static FUNC_PREFIX void d3q27storagespecification_unpack_BN(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2]; } } } } } -namespace internal_d3q27storagespecification_unpack_TE { -static FUNC_PREFIX void d3q27storagespecification_unpack_TE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_TSE { +static FUNC_PREFIX void d3q27storagespecification_unpack_TSE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q27storagespecification_unpack_B { -static FUNC_PREFIX void d3q27storagespecification_unpack_B(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) -{ - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; +namespace internal_d3q27storagespecification_unpack_TNE { +static FUNC_PREFIX void d3q27storagespecification_unpack_TNE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +{ + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35; - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2]; - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1]; - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2]; - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3]; - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q27storagespecification_unpack_NW { -static FUNC_PREFIX void d3q27storagespecification_unpack_NW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_BW { +static FUNC_PREFIX void d3q27storagespecification_unpack_BW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2]; } } } } } -namespace internal_d3q27storagespecification_unpack_S { -static FUNC_PREFIX void d3q27storagespecification_unpack_S(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) -{ - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; +namespace internal_d3q27storagespecification_unpack_TNW { +static FUNC_PREFIX void d3q27storagespecification_unpack_TNW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +{ + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31; - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2]; - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1]; - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2]; - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3]; - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q27storagespecification_unpack_TSW { -static FUNC_PREFIX void d3q27storagespecification_unpack_TSW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_BNE { +static FUNC_PREFIX void d3q27storagespecification_unpack_BNE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q27storagespecification_unpack_BE { -static FUNC_PREFIX void d3q27storagespecification_unpack_BE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_BSE { +static FUNC_PREFIX void d3q27storagespecification_unpack_BSE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q27storagespecification_unpack_BS { -static FUNC_PREFIX void d3q27storagespecification_unpack_BS(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_N { +static FUNC_PREFIX void d3q27storagespecification_unpack_N(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8]; } } } } } -namespace internal_d3q27storagespecification_unpack_BW { -static FUNC_PREFIX void d3q27storagespecification_unpack_BW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_BE { +static FUNC_PREFIX void d3q27storagespecification_unpack_BE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2]; } } } } } -namespace internal_d3q27storagespecification_unpack_TS { -static FUNC_PREFIX void d3q27storagespecification_unpack_TS(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_SW { +static FUNC_PREFIX void d3q27storagespecification_unpack_SW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2]; } } } } } -namespace internal_d3q27storagespecification_unpack_BNW { -static FUNC_PREFIX void d3q27storagespecification_unpack_BNW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_T { +static FUNC_PREFIX void d3q27storagespecification_unpack_T(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8]; } } } } } -namespace internal_d3q27storagespecification_unpack_TW { -static FUNC_PREFIX void d3q27storagespecification_unpack_TW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) +namespace internal_d3q27storagespecification_unpack_TSW { +static FUNC_PREFIX void d3q27storagespecification_unpack_TSW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0]; } } } } } -namespace internal_d3q27storagespecification_localCopy_SE { -static FUNC_PREFIX void d3q27storagespecification_localCopy_SE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) + namespace internal_d3q27storagespecification_localCopy_TNE { +static FUNC_PREFIX void d3q27storagespecification_localCopy_TNE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_TS { -static FUNC_PREFIX void d3q27storagespecification_localCopy_TS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_S { +static FUNC_PREFIX void d3q27storagespecification_localCopy_S(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_BNW { -static FUNC_PREFIX void d3q27storagespecification_localCopy_BNW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_E { +static FUNC_PREFIX void d3q27storagespecification_localCopy_E(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_TSW { -static FUNC_PREFIX void d3q27storagespecification_localCopy_TSW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_TE { +static FUNC_PREFIX void d3q27storagespecification_localCopy_TE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_TE { -static FUNC_PREFIX void d3q27storagespecification_localCopy_TE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_SW { +static FUNC_PREFIX void d3q27storagespecification_localCopy_SW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_TNE { -static FUNC_PREFIX void d3q27storagespecification_localCopy_TNE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_TS { +static FUNC_PREFIX void d3q27storagespecification_localCopy_TS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_BS { -static FUNC_PREFIX void d3q27storagespecification_localCopy_BS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_BSE { +static FUNC_PREFIX void d3q27storagespecification_localCopy_BSE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_W { -static FUNC_PREFIX void d3q27storagespecification_localCopy_W(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_BNE { +static FUNC_PREFIX void d3q27storagespecification_localCopy_BNE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33; - double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33; - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) - { - _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + { + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_TSE { -static FUNC_PREFIX void d3q27storagespecification_localCopy_TSE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_TSW { +static FUNC_PREFIX void d3q27storagespecification_localCopy_TSW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_NE { -static FUNC_PREFIX void d3q27storagespecification_localCopy_NE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_SE { +static FUNC_PREFIX void d3q27storagespecification_localCopy_SE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_B { -static FUNC_PREFIX void d3q27storagespecification_localCopy_B(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_W { +static FUNC_PREFIX void d3q27storagespecification_localCopy_W(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36; - double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36; - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) - { - _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + { + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_TNW { -static FUNC_PREFIX void d3q27storagespecification_localCopy_TNW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_TSE { +static FUNC_PREFIX void d3q27storagespecification_localCopy_TSE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_NW { -static FUNC_PREFIX void d3q27storagespecification_localCopy_NW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_BW { +static FUNC_PREFIX void d3q27storagespecification_localCopy_BW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_BN { -static FUNC_PREFIX void d3q27storagespecification_localCopy_BN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_TW { +static FUNC_PREFIX void d3q27storagespecification_localCopy_TW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_TW { -static FUNC_PREFIX void d3q27storagespecification_localCopy_TW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_B { +static FUNC_PREFIX void d3q27storagespecification_localCopy_B(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_BW { -static FUNC_PREFIX void d3q27storagespecification_localCopy_BW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_N { +static FUNC_PREFIX void d3q27storagespecification_localCopy_N(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317; - double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_SW { -static FUNC_PREFIX void d3q27storagespecification_localCopy_SW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_NW { +static FUNC_PREFIX void d3q27storagespecification_localCopy_NW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_T { -static FUNC_PREFIX void d3q27storagespecification_localCopy_T(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_TNW { +static FUNC_PREFIX void d3q27storagespecification_localCopy_TNW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35; - double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35; - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313; - double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313; - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) - { - _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + { + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_BSW { -static FUNC_PREFIX void d3q27storagespecification_localCopy_BSW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_NE { +static FUNC_PREFIX void d3q27storagespecification_localCopy_NE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_S { -static FUNC_PREFIX void d3q27storagespecification_localCopy_S(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_BN { +static FUNC_PREFIX void d3q27storagespecification_localCopy_BN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32; - double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32; - double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39; - double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39; - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312; - double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312; - double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316; - double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322; - double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326; - double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) - { - _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2]; + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + { + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_TN { -static FUNC_PREFIX void d3q27storagespecification_localCopy_TN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_T { +static FUNC_PREFIX void d3q27storagespecification_localCopy_T(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_E { -static FUNC_PREFIX void d3q27storagespecification_localCopy_E(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_BNW { +static FUNC_PREFIX void d3q27storagespecification_localCopy_BNW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34; - double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34; - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310; - double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310; - double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314; - double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314; - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321; - double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) - { - _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + { + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_N { -static FUNC_PREFIX void d3q27storagespecification_localCopy_N(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_BS { +static FUNC_PREFIX void d3q27storagespecification_localCopy_BS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) - { - double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3; + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + { for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31; - double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31; - double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37; - double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37; - double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38; - double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38; - double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311; - double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311; - double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315; - double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315; - double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319; - double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319; - double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320; - double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324; - double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) - { - _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2]; + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + { + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_BSE { -static FUNC_PREFIX void d3q27storagespecification_localCopy_BSE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_BSW { +static FUNC_PREFIX void d3q27storagespecification_localCopy_BSW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_BE { -static FUNC_PREFIX void d3q27storagespecification_localCopy_BE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_TN { +static FUNC_PREFIX void d3q27storagespecification_localCopy_TN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; - double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318; - double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318; - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325; - double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; - _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3]; } } } } } -namespace internal_d3q27storagespecification_localCopy_BNE { -static FUNC_PREFIX void d3q27storagespecification_localCopy_BNE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) +namespace internal_d3q27storagespecification_localCopy_BE { +static FUNC_PREFIX void d3q27storagespecification_localCopy_BE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3) { - for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3; - double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323; - double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323; - for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1) + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1) { - _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3]; + _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3]; } } } @@ -2622,15 +1625,12 @@ static FUNC_PREFIX void d3q27storagespecification_localCopy_BNE(double * RESTRIC } + - -/************************************************************************************* + /************************************************************************************* * Kernel Wrappers *************************************************************************************/ -namespace walberla { -namespace lbm { - void D3Q27StorageSpecification::PackKernels::packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const { double * buffer = reinterpret_cast(outBuffer); diff --git a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h index 42599878..c765ef8a 100644 --- a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h +++ b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h @@ -68,9 +68,35 @@ class D3Q27StorageSpecification // If true the background deviation (rho_0 = 1) is subtracted for the collision step. static const bool zeroCenteredPDFs = true; // Lattice weights - static constexpr double w[27] = { 0.296296296296296,0.0740740740740741,0.0740740740740741,0.0740740740740741,0.0740740740740741,0.0740740740740741,0.0740740740740741,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963 }; + static constexpr double w[27] = { double(0.296296296296296), double(0.0740740740740741), double(0.0740740740740741), double(0.0740740740740741), double(0.0740740740740741), double(0.0740740740740741), double(0.0740740740740741), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963) }; // Inverse lattice weights - static constexpr double wInv[27] = { 3.37500000000000,13.5000000000000,13.5000000000000,13.5000000000000,13.5000000000000,13.5000000000000,13.5000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000 }; + static constexpr double wInv[27] = { double(3.37500000000000), double(13.5000000000000), double(13.5000000000000), double(13.5000000000000), double(13.5000000000000), double(13.5000000000000), double(13.5000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000) }; + + struct AccessorEVEN + { + static constexpr cell_idx_t readX[27] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, -1, 1, -1, 1, -1, 1 }; + static constexpr cell_idx_t readY[27] = { 0, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, -1, 1, 1, -1, -1, 1, 1 }; + static constexpr cell_idx_t readZ[27] = { 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1 }; + static constexpr cell_idx_t readD[27] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 }; + + static constexpr cell_idx_t writeX[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + static constexpr cell_idx_t writeY[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + static constexpr cell_idx_t writeZ[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + static constexpr cell_idx_t writeD[27] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 }; + }; + + struct AccessorODD + { + static constexpr cell_idx_t readX[27] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, -1, 1, -1, 1, -1, 1 }; + static constexpr cell_idx_t readY[27] = { 0, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, -1, 1, 1, -1, -1, 1, 1 }; + static constexpr cell_idx_t readZ[27] = { 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1 }; + static constexpr cell_idx_t readD[27] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 }; + + static constexpr cell_idx_t writeX[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + static constexpr cell_idx_t writeY[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + static constexpr cell_idx_t writeZ[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + static constexpr cell_idx_t writeD[27] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 }; + }; // Compute kernels to pack and unpack MPI buffers class PackKernels { @@ -84,8 +110,8 @@ class D3Q27StorageSpecification static const bool inplace = false; /** - * Packs all pdfs from the given cell interval to the send buffer. - * */ + * Packs all pdfs from the given cell interval to the send buffer. + * */ void packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const; /** @@ -124,7 +150,7 @@ class D3Q27StorageSpecification * @return The required size of the buffer, in bytes * */ uint_t size (CellInterval & ci, stencil::Direction dir) const { - return ci.numCells() * sizes[dir] * sizeof(value_type); + return ci.numCells() * sizes[dir] * uint_c(sizeof(value_type)); } /** @@ -134,7 +160,7 @@ class D3Q27StorageSpecification * @return The required size of the buffer, in bytes * */ uint_t size (CellInterval & ci) const { - return ci.numCells() * 27 * sizeof(value_type); + return ci.numCells() * 27 * uint_c(sizeof(value_type)); } @@ -143,6 +169,8 @@ class D3Q27StorageSpecification const uint_t sizes[27] { 0, 9, 9, 9, 9, 9, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1 }; }; + using value_type = PackKernels::value_type; + }; }} //lbm/walberla \ No newline at end of file diff --git a/src/lbm_generated/storage_specification/storage_specification_generation_script.py b/src/lbm_generated/storage_specification/storage_specification_generation_script.py index d7432ee7..42dcac5f 100644 --- a/src/lbm_generated/storage_specification/storage_specification_generation_script.py +++ b/src/lbm_generated/storage_specification/storage_specification_generation_script.py @@ -3,7 +3,7 @@ from pystencils import Target from lbmpy.creationfunctions import create_lb_method -from lbmpy import LBMConfig, Stencil, Method, LBStencil +from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil from pystencils_walberla import ManualCodeGenerationContext, generate_info_header from lbmpy_walberla.storage_specification import generate_lbm_storage_specification @@ -22,11 +22,12 @@ lbm_config = LBMConfig(stencil=stencil, method=method, relaxation_rate=relaxation_rate, streaming_pattern=streaming_pattern) + lbm_opt = LBMOptimisation() lb_method = create_lb_method(lbm_config=lbm_config) storage_spec_name = f'{stencil.name}StorageSpecification' - generate_lbm_storage_specification(ctx, storage_spec_name, lb_method, lbm_config, + generate_lbm_storage_specification(ctx, storage_spec_name, lb_method, lbm_config, lbm_opt, nonuniform=nonuniform, target=target, data_type=data_type) ctx.write_all_files() diff --git a/src/lbm_generated/sweep_collection/D3Q19SRT.cpp b/src/lbm_generated/sweep_collection/D3Q19SRT.cpp index b2ed0836..02cf3aaf 100644 --- a/src/lbm_generated/sweep_collection/D3Q19SRT.cpp +++ b/src/lbm_generated/sweep_collection/D3Q19SRT.cpp @@ -41,119 +41,43 @@ static FUNC_PREFIX void d3q19srt_kernel_streamCollide(double * RESTRICT const _d { for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2; - double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3; - double * RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2; - double * RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314; - double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310; - double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38; - double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318; - double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34; - double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311; - double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37; - double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31; - double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315; - double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313; - double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312; - double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35; - double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30; - double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33; - double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39; - double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32; - double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317; - double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316; - double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36; - double * RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30; - double * RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31; - double * RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32; - double * RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33; - double * RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34; - double * RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35; - double * RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36; - double * RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37; - double * RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38; - double * RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39; - double * RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310; - double * RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311; - double * RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312; - double * RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313; - double * RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314; - double * RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315; - double * RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316; - double * RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317; - double * RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318; for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { - const double vel0Term = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double vel1Term = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0]; - const double vel2Term = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0]; - const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0]; - const double u_0 = vel0Term - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double u_1 = vel1Term - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double u_2 = vel2Term - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double u0Mu1 = u_0 + u_1*-1.0; + const double vel0Term = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]; + const double vel1Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]; + const double vel2Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]; + const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]; + const double u_0 = vel0Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]; + const double u_1 = vel1Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]; + const double u_2 = vel2Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]; + const double u0Mu1 = u_0 - u_1; const double u0Pu1 = u_0 + u_1; const double u1Pu2 = u_1 + u_2; - const double u1Mu2 = u_1 + u_2*-1.0; - const double u0Mu2 = u_0 + u_2*-1.0; + const double u1Mu2 = u_1 - u_2; + const double u0Mu2 = u_0 - u_2; const double u0Pu2 = u_0 + u_2; - const double f_eq_common = delta_rho - 1.0*(u_0*u_0) - 1.0*(u_1*u_1) - 1.0*(u_2*u_2); - _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.33333333333333331 - 1.0*_data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 - 1.0*_data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 - 1.0*_data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 - 1.0*_data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 - 1.0*_data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 - 1.0*_data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 - 1.0*_data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; + const double f_eq_common = delta_rho - u_0*u_0 - u_1*u_1 - u_2*u_2; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = omega*(f_eq_common*0.33333333333333331 - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 + 0.33333333333333331*(u_1*u_1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 + 0.33333333333333331*(u_1*u_1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 + 0.33333333333333331*(u_0*u_0) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 + 0.33333333333333331*(u_0*u_0) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 + 0.33333333333333331*(u_2*u_2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 + 0.33333333333333331*(u_2*u_2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3]; } } } @@ -166,100 +90,62 @@ static FUNC_PREFIX void d3q19srt_kernel_collide(double * RESTRICT _data_pdfs, i { for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2; - double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315; - double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318; - double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30; - double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31; - double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37; - double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311; - double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32; - double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34; - double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38; - double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36; - double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313; - double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39; - double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35; - double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33; - double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310; - double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317; - double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314; - double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316; - double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312; for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { - const double xi_1 = _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0]; - const double xi_2 = _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0]; - const double xi_3 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]; - const double xi_4 = _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0]; - const double xi_5 = _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0]; - const double xi_6 = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0]; - const double xi_7 = _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0]; - const double xi_8 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0]; - const double xi_9 = _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0]; - const double xi_10 = _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0]; - const double xi_11 = _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0]; - const double xi_12 = _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0]; - const double xi_13 = _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0]; - const double xi_14 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0]; - const double xi_15 = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0]; - const double xi_16 = _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0]; - const double xi_17 = _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0]; - const double xi_18 = _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0]; - const double xi_19 = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0]; - const double vel0Term = xi_15 + xi_17 + xi_2 + xi_8 + xi_9; - const double vel1Term = xi_1 + xi_4 + xi_5 + xi_6; - const double vel2Term = xi_11 + xi_13 + xi_19; - const double delta_rho = vel0Term + vel1Term + vel2Term + xi_10 + xi_12 + xi_14 + xi_16 + xi_18 + xi_3 + xi_7; - const double u_0 = vel0Term + xi_11*-1.0 + xi_12*-1.0 + xi_14*-1.0 + xi_16*-1.0 + xi_5*-1.0; - const double u_1 = vel1Term + xi_12*-1.0 + xi_15*-1.0 + xi_18*-1.0 + xi_19*-1.0 + xi_7*-1.0 + xi_9; - const double u_2 = vel2Term + xi_1*-1.0 + xi_10*-1.0 + xi_16*-1.0 + xi_17 + xi_18*-1.0 + xi_2*-1.0 + xi_6; - const double u0Mu1 = u_0 + u_1*-1.0; + const double xi_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3]; + const double xi_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]; + const double xi_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]; + const double xi_4 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3]; + const double xi_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]; + const double xi_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3]; + const double xi_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]; + const double xi_8 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3]; + const double xi_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3]; + const double xi_10 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]; + const double xi_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3]; + const double xi_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]; + const double xi_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3]; + const double xi_14 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3]; + const double xi_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]; + const double xi_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3]; + const double xi_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]; + const double xi_18 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3]; + const double xi_19 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]; + const double vel0Term = xi_15 + xi_3 + xi_4 + xi_6 + xi_7; + const double vel1Term = xi_12 + xi_14 + xi_16 + xi_2; + const double vel2Term = xi_1 + xi_18 + xi_8; + const double delta_rho = vel0Term + vel1Term + vel2Term + xi_10 + xi_11 + xi_13 + xi_17 + xi_19 + xi_5 + xi_9; + const double u_0 = vel0Term - xi_1 - xi_17 - xi_19 - xi_2 - xi_9; + const double u_1 = vel1Term - xi_10 - xi_13 - xi_15 - xi_17 + xi_3 - xi_8; + const double u_2 = vel2Term - xi_11 - xi_13 + xi_14 - xi_16 - xi_4 + xi_6 - xi_9; + const double u0Mu1 = u_0 - u_1; const double u0Pu1 = u_0 + u_1; const double u1Pu2 = u_1 + u_2; - const double u1Mu2 = u_1 + u_2*-1.0; - const double u0Mu2 = u_0 + u_2*-1.0; + const double u1Mu2 = u_1 - u_2; + const double u0Mu2 = u_0 - u_2; const double u0Pu2 = u_0 + u_2; - const double f_eq_common = delta_rho - 1.0*(u_0*u_0) - 1.0*(u_1*u_1) - 1.0*(u_2*u_2); - _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.33333333333333331 + xi_3*-1.0) + xi_3; - _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 + xi_4*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_4; - _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 + xi_7*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_7; - _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 + xi_14*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_14; - _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 + xi_8*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_8; - _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 + xi_13*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_13; - _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 + xi_10*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_10; - _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 + xi_5*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_5; - _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 + xi_9*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_9; - _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 + xi_12*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_12; - _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 + xi_15*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_15; - _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 + xi_6*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_6; - _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 + xi_19*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_19; - _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 + xi_11*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_11; - _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 + xi_17*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_17; - _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 + xi_1*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_1; - _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 + xi_18*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_18; - _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 + xi_16*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_16; - _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 + xi_2*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_2; + const double f_eq_common = delta_rho - u_0*u_0 - u_1*u_1 - u_2*u_2; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2] = omega*(f_eq_common*0.33333333333333331 - xi_5) + xi_5; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 - xi_12 + 0.33333333333333331*(u_1*u_1)) + xi_12; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 - xi_10 + 0.33333333333333331*(u_1*u_1)) + xi_10; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 - xi_19 + 0.33333333333333331*(u_0*u_0)) + xi_19; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 - xi_7 + 0.33333333333333331*(u_0*u_0)) + xi_7; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 - xi_18 + 0.33333333333333331*(u_2*u_2)) + xi_18; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 - xi_11 + 0.33333333333333331*(u_2*u_2)) + xi_11; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 - xi_2 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_2; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 - xi_3 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_3; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 - xi_17 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_17; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 - xi_15 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_15; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 - xi_14 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_14; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 - xi_8 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_8; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 - xi_1 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_1; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 - xi_6 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_6; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 - xi_16 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_16; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 - xi_13 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_13; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 - xi_9 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_9; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 - xi_4 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_4; } } } @@ -272,124 +158,48 @@ static FUNC_PREFIX void d3q19srt_kernel_stream(double * RESTRICT const _data_pdf { for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2; - double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3; - double * RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2; - double * RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30; - double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31; - double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32; - double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33; - double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34; - double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35; - double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36; - double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37; - double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38; - double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39; - double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310; - double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311; - double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312; - double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313; - double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314; - double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315; - double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316; - double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317; - double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318; - double * RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30; - double * RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31; - double * RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32; - double * RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33; - double * RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34; - double * RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35; - double * RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36; - double * RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37; - double * RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38; - double * RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39; - double * RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310; - double * RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311; - double * RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312; - double * RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313; - double * RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314; - double * RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315; - double * RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316; - double * RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317; - double * RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318; for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { - const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]; - const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0]; - const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0]; - const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0]; - const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0]; - const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0]; - const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0]; - const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0]; - const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0]; - const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0; - _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1; - _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2; - _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3; - _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4; - _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5; - _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6; - _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7; - _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8; - _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9; - _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10; - _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11; - _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12; - _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13; - _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14; - _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15; - _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16; - _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17; - _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18; + const double streamed_0 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]; + const double streamed_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]; + const double streamed_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]; + const double streamed_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]; + const double streamed_4 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]; + const double streamed_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]; + const double streamed_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3]; + const double streamed_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]; + const double streamed_8 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]; + const double streamed_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]; + const double streamed_10 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]; + const double streamed_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]; + const double streamed_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3]; + const double streamed_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3]; + const double streamed_14 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]; + const double streamed_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3]; + const double streamed_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3]; + const double streamed_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3]; + const double streamed_18 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = streamed_0; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = streamed_1; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = streamed_2; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = streamed_3; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = streamed_4; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = streamed_5; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = streamed_6; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = streamed_7; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = streamed_8; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = streamed_9; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = streamed_10; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = streamed_11; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = streamed_12; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = streamed_13; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = streamed_14; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = streamed_15; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = streamed_16; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = streamed_17; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = streamed_18; } } } @@ -402,124 +212,48 @@ static FUNC_PREFIX void d3q19srt_kernel_streamOnlyNoAdvancement(double * RESTRIC { for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2; - double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3; - double * RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2; - double * RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30; - double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31; - double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32; - double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33; - double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34; - double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35; - double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36; - double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37; - double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38; - double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39; - double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310; - double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311; - double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312; - double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313; - double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314; - double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315; - double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316; - double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317; - double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318; - double * RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30; - double * RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31; - double * RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32; - double * RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33; - double * RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34; - double * RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35; - double * RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36; - double * RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37; - double * RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38; - double * RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39; - double * RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310; - double * RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311; - double * RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312; - double * RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313; - double * RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314; - double * RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315; - double * RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316; - double * RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317; - double * RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318; for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { - const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]; - const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0]; - const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0]; - const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0]; - const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0]; - const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0]; - const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0]; - const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0]; - const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0]; - const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0; - _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1; - _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2; - _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3; - _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4; - _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5; - _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6; - _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7; - _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8; - _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9; - _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10; - _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11; - _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12; - _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13; - _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14; - _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15; - _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16; - _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17; - _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18; + const double streamed_0 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]; + const double streamed_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]; + const double streamed_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]; + const double streamed_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]; + const double streamed_4 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]; + const double streamed_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]; + const double streamed_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3]; + const double streamed_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]; + const double streamed_8 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]; + const double streamed_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]; + const double streamed_10 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]; + const double streamed_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]; + const double streamed_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3]; + const double streamed_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3]; + const double streamed_14 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]; + const double streamed_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3]; + const double streamed_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3]; + const double streamed_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3]; + const double streamed_18 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = streamed_0; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = streamed_1; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = streamed_2; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = streamed_3; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = streamed_4; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = streamed_5; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = streamed_6; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = streamed_7; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = streamed_8; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = streamed_9; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = streamed_10; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = streamed_11; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = streamed_12; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = streamed_13; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = streamed_14; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = streamed_15; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = streamed_16; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = streamed_17; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = streamed_18; } } } @@ -532,80 +266,34 @@ static FUNC_PREFIX void d3q19srt_kernel_initialise(double * RESTRICT const _data { for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1) { - double * RESTRICT _data_density_20_30 = _data_density + _stride_density_2*ctr_2; - double * RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2; - double * RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3; - double * RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3; - double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2; - double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3; for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1) { - double * RESTRICT _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30; - double * RESTRICT _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30; - double * RESTRICT _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31; - double * RESTRICT _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32; - double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30; - double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31; - double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32; - double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33; - double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34; - double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35; - double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36; - double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37; - double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38; - double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39; - double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310; - double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311; - double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312; - double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313; - double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314; - double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315; - double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316; - double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317; - double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318; for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1) { - const double rho = _data_density_20_30_10[_stride_density_0*ctr_0]; + const double rho = _data_density[_stride_density_0*ctr_0 + _stride_density_1*ctr_1 + _stride_density_2*ctr_2]; const double delta_rho = rho - 1.0; - const double u_0 = _data_velocity_20_30_10[_stride_velocity_0*ctr_0]; - const double u_1 = _data_velocity_20_31_10[_stride_velocity_0*ctr_0]; - const double u_2 = _data_velocity_20_32_10[_stride_velocity_0*ctr_0]; - _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = delta_rho*0.33333333333333331 - 0.33333333333333331*(u_0*u_0) - 0.33333333333333331*(u_1*u_1) - 0.33333333333333331*(u_2*u_2); - _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_1*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1); - _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_1*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1); - _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_0*-0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0); - _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_0*0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0); - _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_2*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2); - _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_2*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2); - _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*-0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1); - _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1); - _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*-0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1); - _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1); - _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2); - _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2); - _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2); - _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2); - _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2); - _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2); - _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2); - _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2); + const double u_0 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2]; + const double u_1 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + _stride_velocity_3]; + const double u_2 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3]; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2] = delta_rho*0.33333333333333331 - 0.33333333333333331*(u_0*u_0) - 0.33333333333333331*(u_1*u_1) - 0.33333333333333331*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] = delta_rho*0.055555555555555552 + u_1*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_1*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_0*-0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_0*0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_2*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_2*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*-0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*-0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2); } } } @@ -618,71 +306,25 @@ static FUNC_PREFIX void d3q19srt_kernel_getter(double * RESTRICT _data_density, { for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2; - double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3; - double * RESTRICT _data_density_20_30 = _data_density + _stride_density_2*ctr_2; - double * RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2; - double * RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3; - double * RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3; for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310; - double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314; - double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318; - double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34; - double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38; - double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313; - double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317; - double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33; - double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37; - double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39; - double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31; - double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311; - double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315; - double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312; - double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316; - double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32; - double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35; - double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30; - double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36; - double * RESTRICT _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30; - double * RESTRICT _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30; - double * RESTRICT _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31; - double * RESTRICT _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32; for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1) { - const double vel0Term = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0]; - const double momdensity_0 = vel0Term - 1.0*_data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0]; - const double vel1Term = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0]; - const double momdensity_1 = vel1Term - 1.0*_data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0]; - const double vel2Term = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0]; - const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0]; - const double momdensity_2 = vel2Term - 1.0*_data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0]; + const double vel0Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]; + const double momdensity_0 = vel0Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]; + const double vel1Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]; + const double momdensity_1 = vel1Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]; + const double vel2Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3]; + const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]; + const double momdensity_2 = vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3]; const double rho = delta_rho + 1.0; const double u_0 = momdensity_0; const double u_1 = momdensity_1; const double u_2 = momdensity_2; - _data_density_20_30_10[_stride_density_0*ctr_0] = rho; - _data_velocity_20_30_10[_stride_velocity_0*ctr_0] = u_0; - _data_velocity_20_31_10[_stride_velocity_0*ctr_0] = u_1; - _data_velocity_20_32_10[_stride_velocity_0*ctr_0] = u_2; + _data_density[_stride_density_0*ctr_0 + _stride_density_1*ctr_1 + _stride_density_2*ctr_2] = rho; + _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2] = u_0; + _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + _stride_velocity_3] = u_1; + _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3] = u_2; } } } diff --git a/src/lbm_generated/sweep_collection/D3Q19SRT.h b/src/lbm_generated/sweep_collection/D3Q19SRT.h index 2fdb3850..56475b66 100644 --- a/src/lbm_generated/sweep_collection/D3Q19SRT.h +++ b/src/lbm_generated/sweep_collection/D3Q19SRT.h @@ -59,20 +59,20 @@ namespace lbm { class D3Q19SRT { -public: - enum Type { ALL = 0, INNER = 1, OUTER = 2 }; + public: + enum Type { ALL = 0, INNER = 1, OUTER = 2 }; D3Q19SRT(const shared_ptr< StructuredBlockStorage > & blocks, BlockDataID pdfsID_, BlockDataID densityID_, BlockDataID velocityID_, double omega, const Cell & outerWidth=Cell(1, 1, 1)) - : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth) + : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth) { + validInnerOuterSplit_= true; + for (auto& iBlock : *blocks) { - if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 || - int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 || - int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2) - WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock") + if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 || int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 || int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2) + validInnerOuterSplit_ = false; } }; @@ -117,27 +117,33 @@ class D3Q19SRT std::function streamCollide(Type type) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { streamCollideInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { streamCollideOuter(block); }; - default: - return [this](IBlock* block) { streamCollide(block); }; + case Type::INNER: + return [this](IBlock* block) { streamCollideInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { streamCollideOuter(block); }; + default: + return [this](IBlock* block) { streamCollide(block); }; } } std::function streamCollide(Type type, const cell_idx_t ghost_layers) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { streamCollideInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { streamCollideOuter(block); }; - default: - return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); }; + case Type::INNER: + return [this](IBlock* block) { streamCollideInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { streamCollideOuter(block); }; + default: + return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); }; } } @@ -298,14 +304,14 @@ class D3Q19SRT layers_.push_back(ci); } - + for( auto & ci: layers_ ) { streamCollideCellInterval(pdfs, pdfs_tmp, omega, ci); } - + - pdfs->swapDataPointers(pdfs_tmp); + pdfs->swapDataPointers(pdfs_tmp); } @@ -317,27 +323,33 @@ class D3Q19SRT std::function collide(Type type) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { collideInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { collideOuter(block); }; - default: - return [this](IBlock* block) { collide(block); }; + case Type::INNER: + return [this](IBlock* block) { collideInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { collideOuter(block); }; + default: + return [this](IBlock* block) { collide(block); }; } } std::function collide(Type type, const cell_idx_t ghost_layers) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { collideInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { collideOuter(block); }; - default: - return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); }; + case Type::INNER: + return [this](IBlock* block) { collideInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { collideOuter(block); }; + default: + return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); }; } } @@ -425,14 +437,14 @@ class D3Q19SRT layers_.push_back(ci); } - + for( auto & ci: layers_ ) { collideCellInterval(pdfs, omega, ci); } - + - + } @@ -443,27 +455,33 @@ class D3Q19SRT std::function stream(Type type) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { streamInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { streamOuter(block); }; - default: - return [this](IBlock* block) { stream(block); }; + case Type::INNER: + return [this](IBlock* block) { streamInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { streamOuter(block); }; + default: + return [this](IBlock* block) { stream(block); }; } } std::function stream(Type type, const cell_idx_t ghost_layers) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { streamInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { streamOuter(block); }; - default: - return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); }; + case Type::INNER: + return [this](IBlock* block) { streamInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { streamOuter(block); }; + default: + return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); }; } } @@ -624,14 +642,14 @@ class D3Q19SRT layers_.push_back(ci); } - + for( auto & ci: layers_ ) { streamCellInterval(pdfs, pdfs_tmp, ci); } - + - pdfs->swapDataPointers(pdfs_tmp); + pdfs->swapDataPointers(pdfs_tmp); } @@ -643,27 +661,33 @@ class D3Q19SRT std::function streamOnlyNoAdvancement(Type type) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); }; - default: - return [this](IBlock* block) { streamOnlyNoAdvancement(block); }; + case Type::INNER: + return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); }; + default: + return [this](IBlock* block) { streamOnlyNoAdvancement(block); }; } } std::function streamOnlyNoAdvancement(Type type, const cell_idx_t ghost_layers) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); }; - default: - return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); }; + case Type::INNER: + return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); }; + default: + return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); }; } } @@ -821,14 +845,14 @@ class D3Q19SRT layers_.push_back(ci); } - + for( auto & ci: layers_ ) { streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, ci); } - + - + } @@ -839,27 +863,33 @@ class D3Q19SRT std::function initialise(Type type) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { initialiseInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { initialiseOuter(block); }; - default: - return [this](IBlock* block) { initialise(block); }; + case Type::INNER: + return [this](IBlock* block) { initialiseInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { initialiseOuter(block); }; + default: + return [this](IBlock* block) { initialise(block); }; } } std::function initialise(Type type, const cell_idx_t ghost_layers) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { initialiseInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { initialiseOuter(block); }; - default: - return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); }; + case Type::INNER: + return [this](IBlock* block) { initialiseInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { initialiseOuter(block); }; + default: + return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); }; } } @@ -870,9 +900,9 @@ class D3Q19SRT const cell_idx_t ghost_layers = 0; - auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto velocity = block->getData< field::GhostLayerField >(velocityID); auto density = block->getData< field::GhostLayerField >(densityID); + auto velocity = block->getData< field::GhostLayerField >(velocityID); + auto pdfs = block->getData< field::GhostLayerField >(pdfsID); @@ -884,9 +914,9 @@ class D3Q19SRT { - auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto velocity = block->getData< field::GhostLayerField >(velocityID); auto density = block->getData< field::GhostLayerField >(densityID); + auto velocity = block->getData< field::GhostLayerField >(velocityID); + auto pdfs = block->getData< field::GhostLayerField >(pdfsID); @@ -898,9 +928,9 @@ class D3Q19SRT void initialiseCellInterval(IBlock * block, const CellInterval & ci) { - auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto velocity = block->getData< field::GhostLayerField >(velocityID); auto density = block->getData< field::GhostLayerField >(densityID); + auto velocity = block->getData< field::GhostLayerField >(velocityID); + auto pdfs = block->getData< field::GhostLayerField >(pdfsID); @@ -910,9 +940,9 @@ class D3Q19SRT void initialiseInner(IBlock * block) { - auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto velocity = block->getData< field::GhostLayerField >(velocityID); auto density = block->getData< field::GhostLayerField >(densityID); + auto velocity = block->getData< field::GhostLayerField >(velocityID); + auto pdfs = block->getData< field::GhostLayerField >(pdfsID); @@ -926,9 +956,9 @@ class D3Q19SRT void initialiseOuter(IBlock * block) { - auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto velocity = block->getData< field::GhostLayerField >(velocityID); auto density = block->getData< field::GhostLayerField >(densityID); + auto velocity = block->getData< field::GhostLayerField >(velocityID); + auto pdfs = block->getData< field::GhostLayerField >(pdfsID); @@ -957,14 +987,14 @@ class D3Q19SRT layers_.push_back(ci); } - + for( auto & ci: layers_ ) { initialiseCellInterval(density, pdfs, velocity, ci); } - + - + } @@ -975,27 +1005,33 @@ class D3Q19SRT std::function calculateMacroscopicParameters(Type type) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { calculateMacroscopicParametersInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); }; - default: - return [this](IBlock* block) { calculateMacroscopicParameters(block); }; + case Type::INNER: + return [this](IBlock* block) { calculateMacroscopicParametersInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); }; + default: + return [this](IBlock* block) { calculateMacroscopicParameters(block); }; } } std::function calculateMacroscopicParameters(Type type, const cell_idx_t ghost_layers) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { calculateMacroscopicParametersInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); }; - default: - return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); }; + case Type::INNER: + return [this](IBlock* block) { calculateMacroscopicParametersInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); }; + default: + return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); }; } } @@ -1006,9 +1042,9 @@ class D3Q19SRT const cell_idx_t ghost_layers = 0; - auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto velocity = block->getData< field::GhostLayerField >(velocityID); auto density = block->getData< field::GhostLayerField >(densityID); + auto velocity = block->getData< field::GhostLayerField >(velocityID); + auto pdfs = block->getData< field::GhostLayerField >(pdfsID); @@ -1020,9 +1056,9 @@ class D3Q19SRT { - auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto velocity = block->getData< field::GhostLayerField >(velocityID); auto density = block->getData< field::GhostLayerField >(densityID); + auto velocity = block->getData< field::GhostLayerField >(velocityID); + auto pdfs = block->getData< field::GhostLayerField >(pdfsID); @@ -1034,9 +1070,9 @@ class D3Q19SRT void calculateMacroscopicParametersCellInterval(IBlock * block, const CellInterval & ci) { - auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto velocity = block->getData< field::GhostLayerField >(velocityID); auto density = block->getData< field::GhostLayerField >(densityID); + auto velocity = block->getData< field::GhostLayerField >(velocityID); + auto pdfs = block->getData< field::GhostLayerField >(pdfsID); @@ -1046,9 +1082,9 @@ class D3Q19SRT void calculateMacroscopicParametersInner(IBlock * block) { - auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto velocity = block->getData< field::GhostLayerField >(velocityID); auto density = block->getData< field::GhostLayerField >(densityID); + auto velocity = block->getData< field::GhostLayerField >(velocityID); + auto pdfs = block->getData< field::GhostLayerField >(pdfsID); @@ -1062,9 +1098,9 @@ class D3Q19SRT void calculateMacroscopicParametersOuter(IBlock * block) { - auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto velocity = block->getData< field::GhostLayerField >(velocityID); auto density = block->getData< field::GhostLayerField >(densityID); + auto velocity = block->getData< field::GhostLayerField >(velocityID); + auto pdfs = block->getData< field::GhostLayerField >(pdfsID); @@ -1093,32 +1129,33 @@ class D3Q19SRT layers_.push_back(ci); } - + for( auto & ci: layers_ ) { calculateMacroscopicParametersCellInterval(density, pdfs, velocity, ci); } - + - + } - private: - shared_ptr< StructuredBlockStorage > blocks_; - BlockDataID pdfsID; + private: + shared_ptr< StructuredBlockStorage > blocks_; + BlockDataID pdfsID; BlockDataID densityID; BlockDataID velocityID; double omega_; private: std::set< field::GhostLayerField *, field::SwapableCompare< field::GhostLayerField * > > cache_pdfs_; - Cell outerWidth_; - std::vector layers_; + Cell outerWidth_; + std::vector layers_; + bool validInnerOuterSplit_; - + }; diff --git a/src/lbm_generated/sweep_collection/D3Q27SRT.cpp b/src/lbm_generated/sweep_collection/D3Q27SRT.cpp index ce89749f..24a8f232 100644 --- a/src/lbm_generated/sweep_collection/D3Q27SRT.cpp +++ b/src/lbm_generated/sweep_collection/D3Q27SRT.cpp @@ -41,159 +41,51 @@ static FUNC_PREFIX void d3q27srt_kernel_streamCollide(double * RESTRICT const _d { for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_2m1_321 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_319 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_320 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - double * RESTRICT _data_pdfs_21_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_322 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2; - double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3; - double * RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2; - double * RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_319 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_320 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_321 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_322 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_323 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_324 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_325 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_326 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_2m1_321_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_321; - double * RESTRICT _data_pdfs_2m1_319_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_319; - double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314; - double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310; - double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38; - double * RESTRICT _data_pdfs_21_325_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_325; - double * RESTRICT _data_pdfs_21_323_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_323; - double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318; - double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34; - double * RESTRICT _data_pdfs_2m1_320_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_320; - double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311; - double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37; - double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31; - double * RESTRICT _data_pdfs_21_324_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_324; - double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315; - double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313; - double * RESTRICT _data_pdfs_2m1_322_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_322; - double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312; - double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35; - double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30; - double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33; - double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39; - double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32; - double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317; - double * RESTRICT _data_pdfs_21_326_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_326; - double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316; - double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36; - double * RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30; - double * RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31; - double * RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32; - double * RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33; - double * RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34; - double * RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35; - double * RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36; - double * RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37; - double * RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38; - double * RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39; - double * RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310; - double * RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311; - double * RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312; - double * RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313; - double * RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314; - double * RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315; - double * RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316; - double * RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317; - double * RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318; - double * RESTRICT _data_pdfs_tmp_20_319_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_319; - double * RESTRICT _data_pdfs_tmp_20_320_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_320; - double * RESTRICT _data_pdfs_tmp_20_321_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_321; - double * RESTRICT _data_pdfs_tmp_20_322_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_322; - double * RESTRICT _data_pdfs_tmp_20_323_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_323; - double * RESTRICT _data_pdfs_tmp_20_324_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_324; - double * RESTRICT _data_pdfs_tmp_20_325_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_325; - double * RESTRICT _data_pdfs_tmp_20_326_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_326; for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { - const double vel0Term = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double vel1Term = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double vel2Term = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0]; - const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0]; - const double u_0 = vel0Term - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double u_1 = vel1Term - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double u_2 = vel2Term - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double u0Mu1 = u_0 + u_1*-1.0; + const double vel0Term = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3]; + const double vel1Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]; + const double vel2Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]; + const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]; + const double u_0 = vel0Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3]; + const double u_1 = vel1Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3]; + const double u_2 = vel2Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3]; + const double u0Mu1 = u_0 - u_1; const double u0Pu1 = u_0 + u_1; const double u1Pu2 = u_1 + u_2; - const double u1Mu2 = u_1 + u_2*-1.0; - const double u0Mu2 = u_0 + u_2*-1.0; + const double u1Mu2 = u_1 - u_2; + const double u0Mu2 = u_0 - u_2; const double u0Pu2 = u_0 + u_2; const double f_eq_common = delta_rho - 1.5*(u_0*u_0) - 1.5*(u_1*u_1) - 1.5*(u_2*u_2); - _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.29629629629629628 - 1.0*_data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 - 1.0*_data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 - 1.0*_data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 - 1.0*_data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Mu1*u0Mu1)) + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 - 1.0*_data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Pu1*u0Pu1)) + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Pu1*u0Pu1)) + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Mu1*u0Mu1)) + _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 - 1.0*_data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Mu2*u0Mu2)) + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 - 1.0*_data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Pu2*u0Pu2)) + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Mu2*u1Mu2)) + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Pu2*u1Pu2)) + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0]; - _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Pu2*u0Pu2)) + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Mu2*u0Mu2)) + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_319_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_320_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_321_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_322_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_323_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_324_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_325_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - _data_pdfs_tmp_20_326_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = omega*(f_eq_common*0.29629629629629628 - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 + 0.33333333333333331*(u_1*u_1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 + 0.33333333333333331*(u_1*u_1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 + 0.33333333333333331*(u_0*u_0) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 + 0.33333333333333331*(u_0*u_0) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 + 0.33333333333333331*(u_2*u_2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 + 0.33333333333333331*(u_2*u_2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 + 0.083333333333333329*(u0Mu1*u0Mu1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 + 0.083333333333333329*(u0Pu1*u0Pu1) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 + 0.083333333333333329*(u0Pu1*u0Pu1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 + 0.083333333333333329*(u0Mu1*u0Mu1) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 + 0.083333333333333329*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 + 0.083333333333333329*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 + 0.083333333333333329*(u0Mu2*u0Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 + 0.083333333333333329*(u0Pu2*u0Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 + 0.083333333333333329*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 + 0.083333333333333329*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 + 0.083333333333333329*(u0Pu2*u0Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 + 0.083333333333333329*(u0Mu2*u0Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3]; } } } @@ -206,132 +98,78 @@ static FUNC_PREFIX void d3q27srt_kernel_collide(double * RESTRICT _data_pdfs, i { for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_321 = _data_pdfs + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_319 = _data_pdfs + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_320 = _data_pdfs + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2; - double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_322 = _data_pdfs + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313; - double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36; - double * RESTRICT _data_pdfs_20_326_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_326; - double * RESTRICT _data_pdfs_20_323_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_323; - double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310; - double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316; - double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312; - double * RESTRICT _data_pdfs_20_321_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_321; - double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311; - double * RESTRICT _data_pdfs_20_324_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_324; - double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37; - double * RESTRICT _data_pdfs_20_319_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_319; - double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31; - double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38; - double * RESTRICT _data_pdfs_20_325_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_325; - double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32; - double * RESTRICT _data_pdfs_20_320_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_320; - double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39; - double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35; - double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30; - double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315; - double * RESTRICT _data_pdfs_20_322_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_322; - double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317; - double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314; - double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318; - double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34; - double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33; for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { - const double xi_1 = _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0]; - const double xi_2 = _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0]; - const double xi_3 = _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0]; - const double xi_4 = _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0]; - const double xi_5 = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0]; - const double xi_6 = _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0]; - const double xi_7 = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0]; - const double xi_8 = _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0]; - const double xi_9 = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0]; - const double xi_10 = _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0]; - const double xi_11 = _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0]; - const double xi_12 = _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0]; - const double xi_13 = _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0]; - const double xi_14 = _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0]; - const double xi_15 = _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0]; - const double xi_16 = _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0]; - const double xi_17 = _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0]; - const double xi_18 = _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0]; - const double xi_19 = _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0]; - const double xi_20 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]; - const double xi_21 = _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0]; - const double xi_22 = _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0]; - const double xi_23 = _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0]; - const double xi_24 = _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0]; - const double xi_25 = _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0]; - const double xi_26 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0]; - const double xi_27 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0]; - const double vel0Term = xi_12 + xi_14 + xi_15 + xi_24 + xi_25 + xi_26 + xi_4 + xi_5 + xi_8; - const double vel1Term = xi_10 + xi_11 + xi_13 + xi_17 + xi_21 + xi_9; - const double vel2Term = xi_1 + xi_19 + xi_22 + xi_7; - const double delta_rho = vel0Term + vel1Term + vel2Term + xi_16 + xi_18 + xi_2 + xi_20 + xi_23 + xi_27 + xi_3 + xi_6; - const double u_0 = vel0Term + xi_1*-1.0 + xi_10*-1.0 + xi_11*-1.0 + xi_17*-1.0 + xi_18*-1.0 + xi_22*-1.0 + xi_23*-1.0 + xi_27*-1.0 + xi_3*-1.0; - const double u_1 = vel1Term + xi_12 + xi_14 + xi_15*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_22*-1.0 + xi_3*-1.0 + xi_4 + xi_5*-1.0 + xi_6*-1.0 + xi_7*-1.0 + xi_8*-1.0; - const double u_2 = vel2Term + xi_10*-1.0 + xi_12 + xi_15*-1.0 + xi_17 + xi_2*-1.0 + xi_21*-1.0 + xi_23*-1.0 + xi_24 + xi_25*-1.0 + xi_3*-1.0 + xi_4*-1.0 + xi_6*-1.0 + xi_8 + xi_9; - const double u0Mu1 = u_0 + u_1*-1.0; + const double xi_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3]; + const double xi_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3]; + const double xi_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]; + const double xi_4 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]; + const double xi_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3]; + const double xi_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3]; + const double xi_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3]; + const double xi_8 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3]; + const double xi_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3]; + const double xi_10 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3]; + const double xi_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3]; + const double xi_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3]; + const double xi_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3]; + const double xi_14 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]; + const double xi_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3]; + const double xi_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3]; + const double xi_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3]; + const double xi_18 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3]; + const double xi_19 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]; + const double xi_20 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]; + const double xi_21 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]; + const double xi_22 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3]; + const double xi_23 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3]; + const double xi_24 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3]; + const double xi_25 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]; + const double xi_26 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]; + const double xi_27 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]; + const double vel0Term = xi_1 + xi_10 + xi_13 + xi_14 + xi_16 + xi_17 + xi_19 + xi_24 + xi_25; + const double vel1Term = xi_12 + xi_22 + xi_23 + xi_27 + xi_4 + xi_9; + const double vel2Term = xi_15 + xi_5 + xi_6 + xi_7; + const double delta_rho = vel0Term + vel1Term + vel2Term + xi_11 + xi_18 + xi_2 + xi_20 + xi_21 + xi_26 + xi_3 + xi_8; + const double u_0 = vel0Term - xi_11 - xi_12 - xi_15 - xi_20 - xi_23 - xi_26 - xi_27 - xi_5 - xi_8; + const double u_1 = vel1Term + xi_1 + xi_10 - xi_15 - xi_16 + xi_19 - xi_2 - xi_20 - xi_21 - xi_24 - xi_25 - xi_7 - xi_8; + const double u_2 = vel2Term - xi_1 + xi_10 - xi_11 + xi_12 - xi_13 - xi_16 + xi_17 - xi_18 - xi_2 - xi_22 - xi_23 + xi_24 - xi_8 + xi_9; + const double u0Mu1 = u_0 - u_1; const double u0Pu1 = u_0 + u_1; const double u1Pu2 = u_1 + u_2; - const double u1Mu2 = u_1 + u_2*-1.0; - const double u0Mu2 = u_0 + u_2*-1.0; + const double u1Mu2 = u_1 - u_2; + const double u0Mu2 = u_0 - u_2; const double u0Pu2 = u_0 + u_2; const double f_eq_common = delta_rho - 1.5*(u_0*u_0) - 1.5*(u_1*u_1) - 1.5*(u_2*u_2); - _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.29629629629629628 + xi_20*-1.0) + xi_20; - _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 + xi_13*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_13; - _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 + xi_16*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_16; - _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 + xi_27*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_27; - _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 + xi_26*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_26; - _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 + xi_19*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_19; - _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 + xi_2*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_2; - _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 + xi_11*-1.0 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_11; - _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 + xi_14*-1.0 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_14; - _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 + xi_18*-1.0 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_18; - _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 + xi_5*-1.0 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_5; - _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 + xi_9*-1.0 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_9; - _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 + xi_7*-1.0 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_7; - _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 + xi_1*-1.0 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_1; - _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 + xi_24*-1.0 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_24; - _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 + xi_21*-1.0 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_21; - _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 + xi_6*-1.0 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_6; - _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 + xi_23*-1.0 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_23; - _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 + xi_25*-1.0 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_25; - _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 + xi_12*-1.0 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_12; - _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 + xi_17*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_17; - _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 + xi_8*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_8; - _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 + xi_22*-1.0 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_22; - _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 + xi_4*-1.0 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_4; - _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 + xi_10*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_10; - _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 + xi_15*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_15; - _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 + xi_3*-1.0 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_3; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2] = omega*(f_eq_common*0.29629629629629628 - xi_3) + xi_3; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 - xi_4 + 0.33333333333333331*(u_1*u_1)) + xi_4; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 - xi_21 + 0.33333333333333331*(u_1*u_1)) + xi_21; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 - xi_26 + 0.33333333333333331*(u_0*u_0)) + xi_26; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 - xi_14 + 0.33333333333333331*(u_0*u_0)) + xi_14; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 - xi_6 + 0.33333333333333331*(u_2*u_2)) + xi_6; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 - xi_18 + 0.33333333333333331*(u_2*u_2)) + xi_18; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 - xi_27 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_27; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 - xi_19 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_19; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 - xi_20 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_20; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 - xi_25 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_25; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 - xi_9 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_9; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 - xi_7 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_7; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 - xi_5 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_5; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 - xi_17 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_17; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 - xi_22 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_22; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 - xi_2 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_2; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 - xi_11 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_11; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 - xi_13 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_13; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 - xi_10 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_10; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 - xi_12 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_12; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 - xi_24 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_24; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 - xi_15 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_15; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 - xi_1 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_1; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 - xi_23 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_23; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 - xi_16 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_16; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 - xi_8 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_8; } } } @@ -344,172 +182,64 @@ static FUNC_PREFIX void d3q27srt_kernel_stream(double * RESTRICT const _data_pdf { for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2; - double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_319 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_320 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_321 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_322 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3; - double * RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2; - double * RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_319 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_320 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_321 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_322 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_323 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_324 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_325 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_326 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30; - double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31; - double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32; - double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33; - double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34; - double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35; - double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36; - double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37; - double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38; - double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39; - double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310; - double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311; - double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312; - double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313; - double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314; - double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315; - double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316; - double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317; - double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318; - double * RESTRICT _data_pdfs_2m1_319_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_319; - double * RESTRICT _data_pdfs_2m1_320_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_320; - double * RESTRICT _data_pdfs_2m1_321_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_321; - double * RESTRICT _data_pdfs_2m1_322_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_322; - double * RESTRICT _data_pdfs_21_323_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_323; - double * RESTRICT _data_pdfs_21_324_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_324; - double * RESTRICT _data_pdfs_21_325_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_325; - double * RESTRICT _data_pdfs_21_326_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_326; - double * RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30; - double * RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31; - double * RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32; - double * RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33; - double * RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34; - double * RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35; - double * RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36; - double * RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37; - double * RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38; - double * RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39; - double * RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310; - double * RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311; - double * RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312; - double * RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313; - double * RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314; - double * RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315; - double * RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316; - double * RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317; - double * RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318; - double * RESTRICT _data_pdfs_tmp_20_319_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_319; - double * RESTRICT _data_pdfs_tmp_20_320_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_320; - double * RESTRICT _data_pdfs_tmp_20_321_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_321; - double * RESTRICT _data_pdfs_tmp_20_322_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_322; - double * RESTRICT _data_pdfs_tmp_20_323_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_323; - double * RESTRICT _data_pdfs_tmp_20_324_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_324; - double * RESTRICT _data_pdfs_tmp_20_325_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_325; - double * RESTRICT _data_pdfs_tmp_20_326_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_326; for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { - const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]; - const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0]; - const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0]; - const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0]; - const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0]; - const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0]; - const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0]; - const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0]; - const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0]; - const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_19 = _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_20 = _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_21 = _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_22 = _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_23 = _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_24 = _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_25 = _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_26 = _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0; - _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1; - _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2; - _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3; - _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4; - _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5; - _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6; - _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7; - _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8; - _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9; - _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10; - _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11; - _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12; - _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13; - _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14; - _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15; - _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16; - _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17; - _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18; - _data_pdfs_tmp_20_319_10[_stride_pdfs_tmp_0*ctr_0] = streamed_19; - _data_pdfs_tmp_20_320_10[_stride_pdfs_tmp_0*ctr_0] = streamed_20; - _data_pdfs_tmp_20_321_10[_stride_pdfs_tmp_0*ctr_0] = streamed_21; - _data_pdfs_tmp_20_322_10[_stride_pdfs_tmp_0*ctr_0] = streamed_22; - _data_pdfs_tmp_20_323_10[_stride_pdfs_tmp_0*ctr_0] = streamed_23; - _data_pdfs_tmp_20_324_10[_stride_pdfs_tmp_0*ctr_0] = streamed_24; - _data_pdfs_tmp_20_325_10[_stride_pdfs_tmp_0*ctr_0] = streamed_25; - _data_pdfs_tmp_20_326_10[_stride_pdfs_tmp_0*ctr_0] = streamed_26; + const double streamed_0 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]; + const double streamed_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]; + const double streamed_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]; + const double streamed_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]; + const double streamed_4 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]; + const double streamed_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]; + const double streamed_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3]; + const double streamed_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]; + const double streamed_8 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]; + const double streamed_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]; + const double streamed_10 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]; + const double streamed_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]; + const double streamed_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3]; + const double streamed_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3]; + const double streamed_14 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]; + const double streamed_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3]; + const double streamed_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3]; + const double streamed_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3]; + const double streamed_18 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3]; + const double streamed_19 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3]; + const double streamed_20 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3]; + const double streamed_21 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3]; + const double streamed_22 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3]; + const double streamed_23 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3]; + const double streamed_24 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3]; + const double streamed_25 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3]; + const double streamed_26 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = streamed_0; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = streamed_1; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = streamed_2; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = streamed_3; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = streamed_4; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = streamed_5; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = streamed_6; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = streamed_7; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = streamed_8; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = streamed_9; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = streamed_10; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = streamed_11; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = streamed_12; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = streamed_13; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = streamed_14; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = streamed_15; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = streamed_16; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = streamed_17; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = streamed_18; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3] = streamed_19; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3] = streamed_20; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3] = streamed_21; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3] = streamed_22; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3] = streamed_23; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3] = streamed_24; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3] = streamed_25; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3] = streamed_26; } } } @@ -522,172 +252,64 @@ static FUNC_PREFIX void d3q27srt_kernel_streamOnlyNoAdvancement(double * RESTRIC { for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2; - double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_319 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_320 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_321 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3; - double * RESTRICT _data_pdfs_2m1_322 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3; - double * RESTRICT _data_pdfs_21_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3; - double * RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2; - double * RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_319 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_320 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_321 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_322 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_323 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_324 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_325 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3; - double * RESTRICT _data_pdfs_tmp_20_326 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3; for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30; - double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31; - double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32; - double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33; - double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34; - double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35; - double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36; - double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37; - double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38; - double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39; - double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310; - double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311; - double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312; - double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313; - double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314; - double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315; - double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316; - double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317; - double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318; - double * RESTRICT _data_pdfs_2m1_319_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_319; - double * RESTRICT _data_pdfs_2m1_320_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_320; - double * RESTRICT _data_pdfs_2m1_321_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_321; - double * RESTRICT _data_pdfs_2m1_322_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_322; - double * RESTRICT _data_pdfs_21_323_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_323; - double * RESTRICT _data_pdfs_21_324_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_324; - double * RESTRICT _data_pdfs_21_325_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_325; - double * RESTRICT _data_pdfs_21_326_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_326; - double * RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30; - double * RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31; - double * RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32; - double * RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33; - double * RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34; - double * RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35; - double * RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36; - double * RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37; - double * RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38; - double * RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39; - double * RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310; - double * RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311; - double * RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312; - double * RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313; - double * RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314; - double * RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315; - double * RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316; - double * RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317; - double * RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318; - double * RESTRICT _data_pdfs_tmp_20_319_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_319; - double * RESTRICT _data_pdfs_tmp_20_320_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_320; - double * RESTRICT _data_pdfs_tmp_20_321_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_321; - double * RESTRICT _data_pdfs_tmp_20_322_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_322; - double * RESTRICT _data_pdfs_tmp_20_323_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_323; - double * RESTRICT _data_pdfs_tmp_20_324_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_324; - double * RESTRICT _data_pdfs_tmp_20_325_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_325; - double * RESTRICT _data_pdfs_tmp_20_326_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_326; for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { - const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]; - const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0]; - const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0]; - const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0]; - const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0]; - const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0]; - const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0]; - const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0]; - const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0]; - const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_19 = _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_20 = _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_21 = _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_22 = _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_23 = _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_24 = _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - const double streamed_25 = _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0]; - const double streamed_26 = _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0]; - _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0; - _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1; - _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2; - _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3; - _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4; - _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5; - _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6; - _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7; - _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8; - _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9; - _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10; - _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11; - _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12; - _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13; - _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14; - _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15; - _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16; - _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17; - _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18; - _data_pdfs_tmp_20_319_10[_stride_pdfs_tmp_0*ctr_0] = streamed_19; - _data_pdfs_tmp_20_320_10[_stride_pdfs_tmp_0*ctr_0] = streamed_20; - _data_pdfs_tmp_20_321_10[_stride_pdfs_tmp_0*ctr_0] = streamed_21; - _data_pdfs_tmp_20_322_10[_stride_pdfs_tmp_0*ctr_0] = streamed_22; - _data_pdfs_tmp_20_323_10[_stride_pdfs_tmp_0*ctr_0] = streamed_23; - _data_pdfs_tmp_20_324_10[_stride_pdfs_tmp_0*ctr_0] = streamed_24; - _data_pdfs_tmp_20_325_10[_stride_pdfs_tmp_0*ctr_0] = streamed_25; - _data_pdfs_tmp_20_326_10[_stride_pdfs_tmp_0*ctr_0] = streamed_26; + const double streamed_0 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]; + const double streamed_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]; + const double streamed_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]; + const double streamed_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]; + const double streamed_4 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]; + const double streamed_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]; + const double streamed_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3]; + const double streamed_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]; + const double streamed_8 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]; + const double streamed_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]; + const double streamed_10 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]; + const double streamed_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]; + const double streamed_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3]; + const double streamed_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3]; + const double streamed_14 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]; + const double streamed_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3]; + const double streamed_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3]; + const double streamed_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3]; + const double streamed_18 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3]; + const double streamed_19 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3]; + const double streamed_20 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3]; + const double streamed_21 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3]; + const double streamed_22 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3]; + const double streamed_23 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3]; + const double streamed_24 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3]; + const double streamed_25 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3]; + const double streamed_26 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3]; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = streamed_0; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = streamed_1; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = streamed_2; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = streamed_3; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = streamed_4; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = streamed_5; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = streamed_6; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = streamed_7; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = streamed_8; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = streamed_9; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = streamed_10; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = streamed_11; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = streamed_12; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = streamed_13; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = streamed_14; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = streamed_15; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = streamed_16; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = streamed_17; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = streamed_18; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3] = streamed_19; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3] = streamed_20; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3] = streamed_21; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3] = streamed_22; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3] = streamed_23; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3] = streamed_24; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3] = streamed_25; + _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3] = streamed_26; } } } @@ -700,104 +322,42 @@ static FUNC_PREFIX void d3q27srt_kernel_initialise(double * RESTRICT const _data { for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1) { - double * RESTRICT _data_density_20_30 = _data_density + _stride_density_2*ctr_2; - double * RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2; - double * RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3; - double * RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3; - double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2; - double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_319 = _data_pdfs + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_320 = _data_pdfs + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_321 = _data_pdfs + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_322 = _data_pdfs + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3; for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1) { - double * RESTRICT _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30; - double * RESTRICT _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30; - double * RESTRICT _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31; - double * RESTRICT _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32; - double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30; - double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31; - double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32; - double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33; - double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34; - double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35; - double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36; - double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37; - double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38; - double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39; - double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310; - double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311; - double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312; - double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313; - double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314; - double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315; - double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316; - double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317; - double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318; - double * RESTRICT _data_pdfs_20_319_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_319; - double * RESTRICT _data_pdfs_20_320_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_320; - double * RESTRICT _data_pdfs_20_321_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_321; - double * RESTRICT _data_pdfs_20_322_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_322; - double * RESTRICT _data_pdfs_20_323_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_323; - double * RESTRICT _data_pdfs_20_324_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_324; - double * RESTRICT _data_pdfs_20_325_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_325; - double * RESTRICT _data_pdfs_20_326_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_326; for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1) { - const double rho = _data_density_20_30_10[_stride_density_0*ctr_0]; + const double rho = _data_density[_stride_density_0*ctr_0 + _stride_density_1*ctr_1 + _stride_density_2*ctr_2]; const double delta_rho = rho - 1.0; - const double u_0 = _data_velocity_20_30_10[_stride_velocity_0*ctr_0]; - const double u_1 = _data_velocity_20_31_10[_stride_velocity_0*ctr_0]; - const double u_2 = _data_velocity_20_32_10[_stride_velocity_0*ctr_0]; - _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = delta_rho*0.29629629629629628 - 0.44444444444444442*(u_0*u_0) - 0.44444444444444442*(u_1*u_1) - 0.44444444444444442*(u_2*u_2); - _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_1*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1); - _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_1*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1); - _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_0*-0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0); - _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_0*0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0); - _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_2*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2); - _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_2*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2); - _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*-0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1); - _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1); - _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*-0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1); - _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1); - _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2); - _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2); - _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2); - _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2); - _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2); - _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2); - _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2); - _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2); - _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); - _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); - _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); - _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); - _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); - _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); - _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); - _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); + const double u_0 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2]; + const double u_1 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + _stride_velocity_3]; + const double u_2 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3]; + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2] = delta_rho*0.29629629629629628 - 0.44444444444444442*(u_0*u_0) - 0.44444444444444442*(u_1*u_1) - 0.44444444444444442*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] = delta_rho*0.07407407407407407 + u_1*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_1*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_0*-0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_0*0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_2*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_2*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*-0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*-0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2); } } } @@ -810,87 +370,25 @@ static FUNC_PREFIX void d3q27srt_kernel_getter(double * RESTRICT _data_density, { for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1) { - double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_319 = _data_pdfs + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_321 = _data_pdfs + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_320 = _data_pdfs + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_322 = _data_pdfs + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3; - double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3; - double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2; - double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3; - double * RESTRICT _data_density_20_30 = _data_density + _stride_density_2*ctr_2; - double * RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2; - double * RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3; - double * RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3; for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1) { - double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310; - double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314; - double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318; - double * RESTRICT _data_pdfs_20_319_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_319; - double * RESTRICT _data_pdfs_20_321_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_321; - double * RESTRICT _data_pdfs_20_323_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_323; - double * RESTRICT _data_pdfs_20_325_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_325; - double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34; - double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38; - double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313; - double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317; - double * RESTRICT _data_pdfs_20_320_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_320; - double * RESTRICT _data_pdfs_20_322_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_322; - double * RESTRICT _data_pdfs_20_324_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_324; - double * RESTRICT _data_pdfs_20_326_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_326; - double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33; - double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37; - double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39; - double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31; - double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311; - double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315; - double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312; - double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316; - double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32; - double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35; - double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30; - double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36; - double * RESTRICT _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30; - double * RESTRICT _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30; - double * RESTRICT _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31; - double * RESTRICT _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32; for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1) { - const double vel0Term = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0]; - const double momdensity_0 = vel0Term - 1.0*_data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0]; - const double vel1Term = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0]; - const double momdensity_1 = vel1Term - 1.0*_data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0]; - const double vel2Term = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0]; - const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0]; - const double momdensity_2 = vel2Term - 1.0*_data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0]; + const double vel0Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]; + const double momdensity_0 = vel0Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]; + const double vel1Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]; + const double momdensity_1 = vel1Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]; + const double vel2Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3]; + const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]; + const double momdensity_2 = vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3]; const double rho = delta_rho + 1.0; const double u_0 = momdensity_0; const double u_1 = momdensity_1; const double u_2 = momdensity_2; - _data_density_20_30_10[_stride_density_0*ctr_0] = rho; - _data_velocity_20_30_10[_stride_velocity_0*ctr_0] = u_0; - _data_velocity_20_31_10[_stride_velocity_0*ctr_0] = u_1; - _data_velocity_20_32_10[_stride_velocity_0*ctr_0] = u_2; + _data_density[_stride_density_0*ctr_0 + _stride_density_1*ctr_1 + _stride_density_2*ctr_2] = rho; + _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2] = u_0; + _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + _stride_velocity_3] = u_1; + _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3] = u_2; } } } diff --git a/src/lbm_generated/sweep_collection/D3Q27SRT.h b/src/lbm_generated/sweep_collection/D3Q27SRT.h index eb45b716..72ba7d41 100644 --- a/src/lbm_generated/sweep_collection/D3Q27SRT.h +++ b/src/lbm_generated/sweep_collection/D3Q27SRT.h @@ -59,20 +59,20 @@ namespace lbm { class D3Q27SRT { -public: - enum Type { ALL = 0, INNER = 1, OUTER = 2 }; + public: + enum Type { ALL = 0, INNER = 1, OUTER = 2 }; D3Q27SRT(const shared_ptr< StructuredBlockStorage > & blocks, BlockDataID pdfsID_, BlockDataID densityID_, BlockDataID velocityID_, double omega, const Cell & outerWidth=Cell(1, 1, 1)) - : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth) + : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth) { + validInnerOuterSplit_= true; + for (auto& iBlock : *blocks) { - if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 || - int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 || - int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2) - WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock") + if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 || int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 || int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2) + validInnerOuterSplit_ = false; } }; @@ -117,27 +117,33 @@ class D3Q27SRT std::function streamCollide(Type type) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { streamCollideInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { streamCollideOuter(block); }; - default: - return [this](IBlock* block) { streamCollide(block); }; + case Type::INNER: + return [this](IBlock* block) { streamCollideInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { streamCollideOuter(block); }; + default: + return [this](IBlock* block) { streamCollide(block); }; } } std::function streamCollide(Type type, const cell_idx_t ghost_layers) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { streamCollideInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { streamCollideOuter(block); }; - default: - return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); }; + case Type::INNER: + return [this](IBlock* block) { streamCollideInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { streamCollideOuter(block); }; + default: + return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); }; } } @@ -298,14 +304,14 @@ class D3Q27SRT layers_.push_back(ci); } - + for( auto & ci: layers_ ) { streamCollideCellInterval(pdfs, pdfs_tmp, omega, ci); } - + - pdfs->swapDataPointers(pdfs_tmp); + pdfs->swapDataPointers(pdfs_tmp); } @@ -317,27 +323,33 @@ class D3Q27SRT std::function collide(Type type) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { collideInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { collideOuter(block); }; - default: - return [this](IBlock* block) { collide(block); }; + case Type::INNER: + return [this](IBlock* block) { collideInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { collideOuter(block); }; + default: + return [this](IBlock* block) { collide(block); }; } } std::function collide(Type type, const cell_idx_t ghost_layers) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { collideInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { collideOuter(block); }; - default: - return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); }; + case Type::INNER: + return [this](IBlock* block) { collideInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { collideOuter(block); }; + default: + return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); }; } } @@ -425,14 +437,14 @@ class D3Q27SRT layers_.push_back(ci); } - + for( auto & ci: layers_ ) { collideCellInterval(pdfs, omega, ci); } - + - + } @@ -443,27 +455,33 @@ class D3Q27SRT std::function stream(Type type) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { streamInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { streamOuter(block); }; - default: - return [this](IBlock* block) { stream(block); }; + case Type::INNER: + return [this](IBlock* block) { streamInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { streamOuter(block); }; + default: + return [this](IBlock* block) { stream(block); }; } } std::function stream(Type type, const cell_idx_t ghost_layers) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { streamInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { streamOuter(block); }; - default: - return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); }; + case Type::INNER: + return [this](IBlock* block) { streamInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { streamOuter(block); }; + default: + return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); }; } } @@ -624,14 +642,14 @@ class D3Q27SRT layers_.push_back(ci); } - + for( auto & ci: layers_ ) { streamCellInterval(pdfs, pdfs_tmp, ci); } - + - pdfs->swapDataPointers(pdfs_tmp); + pdfs->swapDataPointers(pdfs_tmp); } @@ -643,27 +661,33 @@ class D3Q27SRT std::function streamOnlyNoAdvancement(Type type) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); }; - default: - return [this](IBlock* block) { streamOnlyNoAdvancement(block); }; + case Type::INNER: + return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); }; + default: + return [this](IBlock* block) { streamOnlyNoAdvancement(block); }; } } std::function streamOnlyNoAdvancement(Type type, const cell_idx_t ghost_layers) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); }; - default: - return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); }; + case Type::INNER: + return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); }; + default: + return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); }; } } @@ -821,14 +845,14 @@ class D3Q27SRT layers_.push_back(ci); } - + for( auto & ci: layers_ ) { streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, ci); } - + - + } @@ -839,27 +863,33 @@ class D3Q27SRT std::function initialise(Type type) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { initialiseInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { initialiseOuter(block); }; - default: - return [this](IBlock* block) { initialise(block); }; + case Type::INNER: + return [this](IBlock* block) { initialiseInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { initialiseOuter(block); }; + default: + return [this](IBlock* block) { initialise(block); }; } } std::function initialise(Type type, const cell_idx_t ghost_layers) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { initialiseInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { initialiseOuter(block); }; - default: - return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); }; + case Type::INNER: + return [this](IBlock* block) { initialiseInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { initialiseOuter(block); }; + default: + return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); }; } } @@ -870,9 +900,9 @@ class D3Q27SRT const cell_idx_t ghost_layers = 0; + auto density = block->getData< field::GhostLayerField >(densityID); auto velocity = block->getData< field::GhostLayerField >(velocityID); auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto density = block->getData< field::GhostLayerField >(densityID); @@ -884,9 +914,9 @@ class D3Q27SRT { + auto density = block->getData< field::GhostLayerField >(densityID); auto velocity = block->getData< field::GhostLayerField >(velocityID); auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto density = block->getData< field::GhostLayerField >(densityID); @@ -898,9 +928,9 @@ class D3Q27SRT void initialiseCellInterval(IBlock * block, const CellInterval & ci) { + auto density = block->getData< field::GhostLayerField >(densityID); auto velocity = block->getData< field::GhostLayerField >(velocityID); auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto density = block->getData< field::GhostLayerField >(densityID); @@ -910,9 +940,9 @@ class D3Q27SRT void initialiseInner(IBlock * block) { + auto density = block->getData< field::GhostLayerField >(densityID); auto velocity = block->getData< field::GhostLayerField >(velocityID); auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto density = block->getData< field::GhostLayerField >(densityID); @@ -926,9 +956,9 @@ class D3Q27SRT void initialiseOuter(IBlock * block) { + auto density = block->getData< field::GhostLayerField >(densityID); auto velocity = block->getData< field::GhostLayerField >(velocityID); auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto density = block->getData< field::GhostLayerField >(densityID); @@ -957,14 +987,14 @@ class D3Q27SRT layers_.push_back(ci); } - + for( auto & ci: layers_ ) { initialiseCellInterval(density, pdfs, velocity, ci); } - + - + } @@ -975,27 +1005,33 @@ class D3Q27SRT std::function calculateMacroscopicParameters(Type type) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { calculateMacroscopicParametersInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); }; - default: - return [this](IBlock* block) { calculateMacroscopicParameters(block); }; + case Type::INNER: + return [this](IBlock* block) { calculateMacroscopicParametersInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); }; + default: + return [this](IBlock* block) { calculateMacroscopicParameters(block); }; } } std::function calculateMacroscopicParameters(Type type, const cell_idx_t ghost_layers) { + if (!validInnerOuterSplit_ && type != Type::ALL) + WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding") + switch (type) { - case Type::INNER: - return [this](IBlock* block) { calculateMacroscopicParametersInner(block); }; - case Type::OUTER: - return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); }; - default: - return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); }; + case Type::INNER: + return [this](IBlock* block) { calculateMacroscopicParametersInner(block); }; + case Type::OUTER: + return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); }; + default: + return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); }; } } @@ -1006,9 +1042,9 @@ class D3Q27SRT const cell_idx_t ghost_layers = 0; + auto density = block->getData< field::GhostLayerField >(densityID); auto velocity = block->getData< field::GhostLayerField >(velocityID); auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto density = block->getData< field::GhostLayerField >(densityID); @@ -1020,9 +1056,9 @@ class D3Q27SRT { + auto density = block->getData< field::GhostLayerField >(densityID); auto velocity = block->getData< field::GhostLayerField >(velocityID); auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto density = block->getData< field::GhostLayerField >(densityID); @@ -1034,9 +1070,9 @@ class D3Q27SRT void calculateMacroscopicParametersCellInterval(IBlock * block, const CellInterval & ci) { + auto density = block->getData< field::GhostLayerField >(densityID); auto velocity = block->getData< field::GhostLayerField >(velocityID); auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto density = block->getData< field::GhostLayerField >(densityID); @@ -1046,9 +1082,9 @@ class D3Q27SRT void calculateMacroscopicParametersInner(IBlock * block) { + auto density = block->getData< field::GhostLayerField >(densityID); auto velocity = block->getData< field::GhostLayerField >(velocityID); auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto density = block->getData< field::GhostLayerField >(densityID); @@ -1062,9 +1098,9 @@ class D3Q27SRT void calculateMacroscopicParametersOuter(IBlock * block) { + auto density = block->getData< field::GhostLayerField >(densityID); auto velocity = block->getData< field::GhostLayerField >(velocityID); auto pdfs = block->getData< field::GhostLayerField >(pdfsID); - auto density = block->getData< field::GhostLayerField >(densityID); @@ -1093,32 +1129,33 @@ class D3Q27SRT layers_.push_back(ci); } - + for( auto & ci: layers_ ) { calculateMacroscopicParametersCellInterval(density, pdfs, velocity, ci); } - + - + } - private: - shared_ptr< StructuredBlockStorage > blocks_; - BlockDataID pdfsID; + private: + shared_ptr< StructuredBlockStorage > blocks_; + BlockDataID pdfsID; BlockDataID densityID; BlockDataID velocityID; double omega_; private: std::set< field::GhostLayerField *, field::SwapableCompare< field::GhostLayerField * > > cache_pdfs_; - Cell outerWidth_; - std::vector layers_; + Cell outerWidth_; + std::vector layers_; + bool validInnerOuterSplit_; - + }; diff --git a/src/lbm_generated/sweep_collection/sweep_collection_generation_script.py b/src/lbm_generated/sweep_collection/sweep_collection_generation_script.py index bdc20860..ad270861 100644 --- a/src/lbm_generated/sweep_collection/sweep_collection_generation_script.py +++ b/src/lbm_generated/sweep_collection/sweep_collection_generation_script.py @@ -25,7 +25,7 @@ relaxation_rate = sp.symbols("omega") streaming_pattern = 'pull' - pdfs = fields(f"pdfs({stencil.Q}): {data_type}[{stencil.D}D]", layout='fzyx') + pdfs, pdfs_tmp = fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {data_type}[3D]", layout='fzyx') density_field, velocity_field = fields(f"density(1), velocity({stencil.D}): {data_type}[{stencil.D}D]", layout='fzyx') @@ -33,13 +33,12 @@ lbm_config = LBMConfig(stencil=stencil, method=method, relaxation_rate=relaxation_rate, streaming_pattern=streaming_pattern) - lbm_opt = LBMOptimisation(cse_global=False) + lbm_opt = LBMOptimisation(cse_global=False, symbolic_field=pdfs, symbolic_temporary_field=pdfs_tmp) collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) generate_lbm_sweep_collection(ctx, f'{stencil.name}{method.name}', collision_rule, - streaming_pattern='pull', - field_layout='zyxf', + lbm_config=lbm_config, lbm_optimisation=lbm_opt, refinement_scaling=None, macroscopic_fields=macroscopic_fields, target=target, data_type=data_type, diff --git a/src/mesh_common/MeshOperations.h b/src/mesh_common/MeshOperations.h index 159481e6..ecaec8eb 100644 --- a/src/mesh_common/MeshOperations.h +++ b/src/mesh_common/MeshOperations.h @@ -91,6 +91,19 @@ typename MeshType::Point principalComponent( const MeshType & mesh, InputIterato template< typename MeshType > typename MeshType::Point principalComponent( const MeshType & mesh, const uint_t iterations = uint_t( 10 ) ); +/** +* \brief Color the faces of a mesh according to its vertices +* +* Iterate over all faces and colors them in their vertex color. +* If no uniform coloring of the vertices is given, a default color is taken. +* +* \tparam MeshType The type of the Mesh +* +* \param mesh The Mesh source mesh +* \param defaultColor Default color if no uniform coloring is given + */ +template< typename MeshType > +void vertexToFaceColor( const MeshType & mesh, const typename MeshType::Color& defaultColor); template< typename MeshType > math::GenericAABB< typename MeshType::Scalar > computeAABB( const MeshType & mesh ) @@ -735,6 +748,34 @@ typename MeshType::Point principalComponent( const MeshType & mesh, const uint_t return principalComponent( mesh, mesh.faces_begin(), mesh.faces_end(), iterations ); } +template< typename MeshType > +void vertexToFaceColor( const MeshType & mesh, const typename MeshType::Color& defaultColor) +{ + WALBERLA_CHECK(mesh.has_vertex_colors()) + mesh.request_face_colors(); + + for (auto faceIt = mesh.faces_begin(); faceIt != mesh.faces_end(); ++faceIt) + { + typename MeshType::Color vertexColor; + + bool useVertexColor = true; + + auto vertexIt = mesh.fv_iter(*faceIt); + WALBERLA_ASSERT(vertexIt.is_valid()) + + vertexColor = mesh.color(*vertexIt); + + ++vertexIt; + while (vertexIt.is_valid() && useVertexColor) + { + if (vertexColor != mesh.color(*vertexIt)) useVertexColor = false; + ++vertexIt; + } + + mesh.set_color(*faceIt, useVertexColor ? vertexColor : defaultColor); + } +} + } // namespace mesh diff --git a/tests/lbm_generated/Example.cpp b/tests/lbm_generated/Example.cpp index 2e77ddcb..fef75d81 100644 --- a/tests/lbm_generated/Example.cpp +++ b/tests/lbm_generated/Example.cpp @@ -145,7 +145,7 @@ int main(int argc, char** argv) auto domainSetup = walberlaEnv.config()->getOneBlock("DomainSetup"); auto parameters = walberlaEnv.config()->getOneBlock("Parameters"); - auto omega = parameters.getParameter< real_t >("omega", real_c(1.4)); + real_t omega = parameters.getParameter< real_t >("omega", real_c(1.4)); auto timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(10)) + uint_c(1); auto refinementDepth = parameters.getParameter< uint_t >("refinementDepth", uint_c(1)); diff --git a/tests/lbm_generated/Example.py b/tests/lbm_generated/Example.py index c4e3eaa9..17d1c1c1 100644 --- a/tests/lbm_generated/Example.py +++ b/tests/lbm_generated/Example.py @@ -25,12 +25,12 @@ stencil = LBStencil(Stencil.D3Q19) pdfs, vel_field = fields(f"pdfs({stencil.Q}), velocity({stencil.D}): {data_type}[{stencil.D}D]", layout='fzyx') - + macroscopic_fields = {'velocity': vel_field} lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega, streaming_pattern=streaming_pattern) - lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx') + lbm_opt = LBMOptimisation(cse_global=False, symbolic_field=pdfs, field_layout='fzyx') method = create_lb_method(lbm_config=lbm_config) collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)