diff --git a/.gitignore b/.gitignore
index 9b148120..76b5902a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@ qrc_*
# CLion indexing
*.uuid
+.fleet
# Generated files
@@ -32,11 +33,13 @@ qrc_*
# Visual Studio Code
/.vscode
+# Zed
+/.cache*
+
# CLion
*.idea
*.clion*
-
# QtCreator
CMakeLists.txt.user.*
diff --git a/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt b/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt
index 2b37ed6f..1a58f36d 100644
--- a/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt
+++ b/apps/benchmarks/NonUniformGridCPU/CMakeLists.txt
@@ -11,11 +11,6 @@ waLBerla_generate_target_from_python(NAME NonUniformGridCPUGenerated
NonUniformGridCPUBoundaryCollection.h
NonUniformGridCPUInfoHeader.h)
-waLBerla_add_executable( NAME NonUniformGridGenerator
- FILES NonUniformGridGenerator.cpp LdcSetup.h
- DEPENDS blockforest core field python_coupling )
-
-
waLBerla_add_executable( NAME NonUniformGridCPU
- FILES NonUniformGridCPU.cpp LdcSetup.h
+ FILES NonUniformGridCPU.cpp LdcSetup.h GridGeneration.h
DEPENDS blockforest boundary core domain_decomposition field geometry lbm_generated python_coupling timeloop vtk NonUniformGridCPUGenerated )
diff --git a/apps/benchmarks/NonUniformGridCPU/GridGeneration.h b/apps/benchmarks/NonUniformGridCPU/GridGeneration.h
new file mode 100644
index 00000000..ec7069bc
--- /dev/null
+++ b/apps/benchmarks/NonUniformGridCPU/GridGeneration.h
@@ -0,0 +1,144 @@
+//======================================================================================================================
+//
+// This file is part of waLBerla. waLBerla is free software: you can
+// redistribute it and/or modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation, either version 3 of
+// the License, or (at your option) any later version.
+//
+// waLBerla is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with waLBerla (see COPYING.txt). If not, see .
+//
+//! \file GridGeneration.h
+//! \author Markus Holzer
+//
+//======================================================================================================================
+#pragma once
+
+#include "blockforest/Initialization.h"
+#include "blockforest/SetupBlock.h"
+#include "blockforest/SetupBlockForest.h"
+#include "blockforest/loadbalancing/StaticCurve.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "core/timing/TimingPool.h"
+
+#include
+
+#include "LdcSetup.h"
+#include "NonUniformGridCPUInfoHeader.h"
+
+using StorageSpecification_T = lbm::NonUniformGridCPUStorageSpecification;
+using Stencil_T = StorageSpecification_T::Stencil;
+
+using namespace walberla;
+
+void createSetupBlockForest(SetupBlockForest& setupBfs,
+ const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup,
+ const bool useMPIManager=false)
+{
+ WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
+
+ Vector3 domainSize = domainSetup.getParameter >("domainSize");
+ Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
+ Vector3 rootBlocks = domainSetup.getParameter >("rootBlocks");
+ Vector3 periodic = domainSetup.getParameter >("periodic");
+
+ const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
+ uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
+ const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
+ const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
+ const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
+
+ if(useMPIManager)
+ numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());
+
+ const LDC ldc(refinementDepth);
+
+ auto refSelection = ldc.refinementSelector();
+ setupBfs.addRefinementSelectionFunction(std::function(refSelection));
+ const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
+ setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
+ setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
+ setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
+
+ if(mpi::MPIManager::instance()->numProcesses() > 1)
+ return;
+
+ {
+ std::ostringstream oss;
+ oss << blockForestFilestem << ".bfs";
+ setupBfs.saveToFile(oss.str().c_str());
+ }
+
+ if(writeVtk){
+ setupBfs.writeVTKOutput(blockForestFilestem);
+ }
+
+ if(outputStatistics){
+ WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================");
+ WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
+ for (uint_t level = 0; level <= refinementDepth; level++)
+ {
+ const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
+ WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
+ }
+
+ const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
+ WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
+
+ const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
+ const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
+
+ const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
+ const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
+ const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type);
+ const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
+ const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9;
+
+ WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
+ WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
+ WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB")
+
+ WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
+ }
+}
+
+void createBlockForest(shared_ptr< BlockForest >& bfs,
+ const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup)
+{
+ if (mpi::MPIManager::instance()->numProcesses() > 1)
+ {
+ const std::string blockForestFilestem =
+ blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
+ // Load structured block forest from file
+ std::ostringstream oss;
+ oss << blockForestFilestem << ".bfs";
+ const std::string setupBlockForestFilepath = oss.str();
+ std::ifstream infile(setupBlockForestFilepath.c_str());
+ if(!infile.good())
+ {
+ WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!")
+ SetupBlockForest setupBfs;
+ createSetupBlockForest(setupBfs, domainSetup, blockForestSetup, true);
+ bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+ }
+ else
+ {
+ bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
+ setupBlockForestFilepath.c_str(), false);
+ }
+ }
+ else
+ {
+ SetupBlockForest setupBfs;
+ createSetupBlockForest(setupBfs, domainSetup, blockForestSetup);
+ bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+ }
+}
\ No newline at end of file
diff --git a/apps/benchmarks/NonUniformGridCPU/LdcSetup.h b/apps/benchmarks/NonUniformGridCPU/LdcSetup.h
index 070656cb..6fe5a508 100644
--- a/apps/benchmarks/NonUniformGridCPU/LdcSetup.h
+++ b/apps/benchmarks/NonUniformGridCPU/LdcSetup.h
@@ -48,14 +48,8 @@ class LDCRefinement
{
const AABB & domain = forest.getDomain();
- const real_t xSize = ( domain.xSize() / real_t(12) ) * real_c( 0.99 );
- const real_t ySize = ( domain.ySize() / real_t(12) ) * real_c( 0.99 );
-
- const AABB leftCorner( domain.xMin(), domain.yMin(), domain.zMin(),
- domain.xMin() + xSize, domain.yMin() + ySize, domain.zMax() );
-
- const AABB rightCorner( domain.xMax() - xSize, domain.yMin(), domain.zMin(),
- domain.xMax(), domain.yMin() + ySize, domain.zMax() );
+ const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() );
+ const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() );
for(auto & block : forest)
{
diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp
index 34fde13e..84b78291 100644
--- a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp
+++ b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp
@@ -38,6 +38,7 @@
#include
+#include "GridGeneration.h"
#include "LdcSetup.h"
#include "NonUniformGridCPUInfoHeader.h"
#include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h"
@@ -77,23 +78,25 @@ int main(int argc, char** argv)
auto config = *cfg;
logging::configureLogging(config);
-
+ auto domainSetup = config->getOneBlock("DomainSetup");
auto blockForestSetup = config->getOneBlock("SetupBlockForest");
+ const bool writeSetupForestAndReturn = blockForestSetup.getParameter< bool >("writeSetupForestAndReturn", true);
+
const std::string blockForestFilestem =
blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
- auto domainSetup = config->getOneBlock("DomainSetup");
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
- // Load structured block forest from file
- std::ostringstream oss;
- oss << blockForestFilestem << ".bfs";
- const std::string setupBlockForestFilepath = oss.str();
+ shared_ptr< BlockForest > bfs;
+ createBlockForest(bfs, domainSetup, blockForestSetup);
+
+ if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1)
+ {
+ WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
+ return EXIT_SUCCESS;
+ }
- WALBERLA_LOG_INFO_ON_ROOT("Creating structured block forest...")
- auto bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
- setupBlockForestFilepath.c_str(), false);
auto blocks =
std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
blocks->createCellBoundingBoxes();
@@ -173,6 +176,8 @@ int main(int argc, char** argv)
const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
const bool useVTKAMRWriter = parameters.getParameter< bool >("useVTKAMRWriter", false);
const bool oneFilePerProcess = parameters.getParameter< bool >("oneFilePerProcess", false);
+
+ auto finalDomain = blocks->getDomain();
if (vtkWriteFrequency > 0)
{
auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
@@ -180,6 +185,12 @@ int main(int argc, char** argv)
auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldID, "vel");
vtkOutput->addCellDataWriter(velWriter);
+ if (parameters.getParameter< bool >("writeOnlySlice", true)){
+ const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - blocks->dz(refinementDepth),
+ finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + blocks->dz(refinementDepth));
+ vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
+ }
+
vtkOutput->addBeforeFunction([&]() {
for (auto& block : *blocks)
sweepCollection.calculateMacroscopicParameters(&block);
@@ -236,6 +247,8 @@ int main(int argc, char** argv)
pythonCallbackResults.data().exposeValue("numProcesses", performance.processes());
pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
pythonCallbackResults.data().exposeValue("numCores", performance.cores());
+ pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
+ pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerProcess",
diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py
index 25e93420..368fd569 100644
--- a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py
+++ b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.py
@@ -23,17 +23,23 @@
with CodeGeneration() as ctx:
field_type = "float64" if ctx.double_accuracy else "float32"
+ cpu_vec = {"instruction_set": None}
- streaming_pattern = 'aa'
+ streaming_pattern = 'esopull'
timesteps = get_timesteps(streaming_pattern)
stencil = LBStencil(Stencil.D3Q19)
+ method_enum = Method.CUMULANT
+
+ fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False
+ collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower()
assert stencil.D == 3, "This application supports only three-dimensional stencils"
pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
- lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega, compressible=True,
+ lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True,
+ fourth_order_correction=fourth_order_correction,
streaming_pattern=streaming_pattern)
lbm_opt = LBMOptimisation(cse_global=False, field_layout="fzyx")
@@ -50,12 +56,12 @@
lbm_config=lbm_config, lbm_optimisation=lbm_opt,
nonuniform=True, boundaries=[no_slip, ubb],
macroscopic_fields=macroscopic_fields,
- target=ps.Target.CPU)
+ target=ps.Target.CPU, cpu_vectorize_info=cpu_vec,)
infoHeaderParams = {
'stencil': stencil.name.lower(),
'streaming_pattern': streaming_pattern,
- 'collision_setup': lbm_config.method.name.lower(),
+ 'collision_setup': collision_setup,
'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs),
}
diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridGenerator.cpp b/apps/benchmarks/NonUniformGridCPU/NonUniformGridGenerator.cpp
deleted file mode 100644
index d7eab304..00000000
--- a/apps/benchmarks/NonUniformGridCPU/NonUniformGridGenerator.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-//======================================================================================================================
-//
-// This file is part of waLBerla. waLBerla is free software: you can
-// redistribute it and/or modify it under the terms of the GNU General Public
-// License as published by the Free Software Foundation, either version 3 of
-// the License, or (at your option) any later version.
-//
-// waLBerla is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-// for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with waLBerla (see COPYING.txt). If not, see .
-//
-//! \file NonUniformGridGenerator.cpp
-//! \author Frederik Hennig
-//
-//======================================================================================================================
-
-#include "blockforest/Initialization.h"
-#include "blockforest/SetupBlock.h"
-#include "blockforest/SetupBlockForest.h"
-#include "blockforest/loadbalancing/StaticCurve.h"
-
-#include "core/all.h"
-
-#include "python_coupling/CreateConfig.h"
-
-#include
-
-#include "LdcSetup.h"
-
-using namespace walberla;
-
-
-int main(int argc, char ** argv){
- const mpi::Environment env(argc, argv);
- mpi::MPIManager::instance()->useWorldComm();
-
- if(mpi::MPIManager::instance()->numProcesses() > 1){
- WALBERLA_ABORT("Commandment: Thou shalt not run thy grid generator with more than one process.");
- }
-
- for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
- {
- auto config = *cfg;
- auto domainSetup = config->getOneBlock("DomainSetup");
-
- Vector3 domainSize = domainSetup.getParameter >("domainSize");
- Vector3 rootBlocks = domainSetup.getParameter >("rootBlocks");
- Vector3 periodic = domainSetup.getParameter >("periodic");
-
- auto blockForestSetup = config->getOneBlock("SetupBlockForest");
- const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
- const uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
- const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
- const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
- const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
-
- const LDC ldc(refinementDepth);
- SetupBlockForest setupBfs;
-
- auto refSelection = ldc.refinementSelector();
- setupBfs.addRefinementSelectionFunction(std::function(refSelection));
- const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
- setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
- setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
- setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
-
- {
- std::ostringstream oss;
- oss << blockForestFilestem << ".bfs";
- setupBfs.saveToFile(oss.str().c_str());
- }
-
- if(writeVtk){
- setupBfs.writeVTKOutput(blockForestFilestem);
- }
-
- if(outputStatistics){
- WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================");
- WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
- for (uint_t level = 0; level <= refinementDepth; level++)
- {
- const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
- WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
- }
-
- const uint_t avgBlocksPerProc = setupBfs.getNumberOfBlocks() / setupBfs.getNumberOfProcesses();
- WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
- WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
- }
-
-
- WALBERLA_LOG_INFO_ON_ROOT("Ending program")
- }
-}
diff --git a/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py
index 51a0220b..108b431f 100644
--- a/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py
+++ b/apps/benchmarks/NonUniformGridCPU/simulation_setup/benchmark_configs.py
@@ -1,10 +1,25 @@
import waLBerla as wlb
+from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
import sqlite3
import os
import sys
+try:
+ import machinestate as ms
+except ImportError:
+ ms = None
+
DB_FILE = os.environ.get('DB_FILE', "cpu_benchmark.sqlite3")
+BENCHMARK = int(os.environ.get('BENCHMARK', 0))
+
+WeakX = int(os.environ.get('WeakX', 128))
+WeakY = int(os.environ.get('WeakY', 128))
+WeakZ = int(os.environ.get('WeakZ', 128))
+
+StrongX = int(os.environ.get('StrongX', 128))
+StrongY = int(os.environ.get('StrongY', 128))
+StrongZ = int(os.environ.get('StrongZ', 128))
class Scenario:
@@ -18,7 +33,8 @@ def __init__(self,
vtk_write_frequency=0,
logger_frequency=0,
blockforest_filestem="blockforest",
- write_setup_vtk=False):
+ write_setup_vtk=True,
+ db_file_name=None):
self.domain_size = domain_size
self.root_blocks = root_blocks
@@ -34,6 +50,8 @@ def __init__(self,
self.vtk_write_frequency = vtk_write_frequency
self.logger_frequency = logger_frequency
+ self.db_file_name = DB_FILE if db_file_name is None else db_file_name
+
self.config_dict = self.config(print_dict=False)
@wlb.member_callback
@@ -51,7 +69,8 @@ def config(self, print_dict=True):
'numProcesses': self.num_processes,
'blockForestFilestem': self.bfs_filestem,
'writeVtk': self.write_setup_vtk,
- 'outputStatistics': False
+ 'outputStatistics': True,
+ 'writeSetupForestAndReturn': True,
},
'Parameters': {
'omega': 1.95,
@@ -59,14 +78,15 @@ def config(self, print_dict=True):
'remainingTimeLoggerFrequency': self.logger_frequency,
'vtkWriteFrequency': self.vtk_write_frequency,
'useVTKAMRWriter': True,
- 'oneFilePerProcess': False
+ 'oneFilePerProcess': False,
+ 'writeOnlySlice': False
},
'Logging': {
'logLevel': "info",
}
}
- if (print_dict):
+ if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
return config_dict
@@ -82,6 +102,15 @@ def results_callback(self, **kwargs):
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
+
+ if ms:
+ state = ms.MachineState(extended=False, anonymous=True)
+ state.generate() # generate subclasses
+ state.update() # read information
+ data["MachineState"] = str(state.get())
+ else:
+ print("MachineState module is not available. MachineState was not saved")
+
sequenceValuesToScalars(data)
result = data
@@ -92,52 +121,109 @@ def results_callback(self, **kwargs):
table_name = table_name.replace("-", "_")
for num_try in range(num_tries):
try:
- checkAndUpdateSchema(result, table_name, DB_FILE)
- storeSingle(result, table_name, DB_FILE)
+ checkAndUpdateSchema(result, table_name, self.db_file_name)
+ storeSingle(result, table_name, self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
-def validation_run():
- """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
- wlb.log_info_on_root("Validation run")
+def weak_scaling_ldc(num_proc, uniform=False):
+ wlb.log_info_on_root("Running weak scaling benchmark...")
- domain_size = (96, 96, 96)
- cells_per_block = (32, 32, 32)
+ # This benchmark must run from 16 processes onwards
+ if wlb.mpi.numProcesses() > 1:
+ num_proc = wlb.mpi.numProcesses()
+
+ if uniform:
+ factor = 3 * num_proc
+ name = "uniform"
+ else:
+ if num_proc % 16 != 0:
+ raise RuntimeError("Number of processes must be dividable by 16")
+ factor = int(num_proc // 16)
+ name = "nonuniform"
+
+ cells_per_block = (WeakX, WeakY, WeakZ)
+ domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
- scenario = Scenario(domain_size=domain_size,
+ scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
+ domain_size=domain_size,
root_blocks=root_blocks,
- num_processes=1,
- refinement_depth=1,
+ num_processes=num_proc,
cells_per_block=cells_per_block,
- timesteps=201,
- vtk_write_frequency=100,
- logger_frequency=5,
- write_setup_vtk=True)
+ refinement_depth=0 if uniform else 3,
+ timesteps=10,
+ db_file_name=f"weakScalingCPU{name}LDC.sqlite3")
+ scenarios.add(scenario)
+
+
+def strong_scaling_ldc(num_proc, uniform=False):
+ wlb.log_info_on_root("Running strong scaling benchmark...")
+
+ # This benchmark must run from 64 GPUs onwards
+ if wlb.mpi.numProcesses() > 1:
+ num_proc = wlb.mpi.numProcesses()
+
+ if num_proc % 64 != 0:
+ raise RuntimeError("Number of processes must be dividable by 64")
+
+ cells_per_block = (StrongX, StrongY, StrongZ)
+
+ if uniform:
+ domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16)
+ name = "uniform"
+ else:
+ factor = int(num_proc / 64)
+ blocks64 = block_decomposition(factor)
+ cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))])
+ domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
+ name = "nonuniform"
+
+ root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
+
+ scenarios = wlb.ScenarioManager()
+ scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
+ domain_size=domain_size,
+ root_blocks=root_blocks,
+ num_processes=num_proc,
+ cells_per_block=cells_per_block,
+ refinement_depth=0 if uniform else 3,
+ timesteps=10,
+ db_file_name=f"strongScalingCPU{name}LDC.sqlite3")
scenarios.add(scenario)
-def scaling():
- wlb.log_info_on_root("Running scaling benchmark...")
+def validation_run():
+ """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
+ wlb.log_info_on_root("Validation run")
- numProc = wlb.mpi.numProcesses()
+ domain_size = (96, 96, 32)
+ cells_per_block = (32, 32, 32)
- domain_size = (256, 256, 128 * numProc)
- cells_per_block = (64, 64, 64)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(domain_size=domain_size,
root_blocks=root_blocks,
+ num_processes=1,
+ refinement_depth=3,
cells_per_block=cells_per_block,
- refinement_depth=2,
- timesteps=10)
+ timesteps=1001,
+ vtk_write_frequency=100,
+ logger_frequency=5,
+ write_setup_vtk=True)
scenarios.add(scenario)
-validation_run()
-# scaling()
+if BENCHMARK == 0:
+ validation_run()
+elif BENCHMARK == 1:
+ weak_scaling_ldc(1, False)
+elif BENCHMARK == 2:
+ strong_scaling_ldc(1, False)
+else:
+ print(f"Invalid benchmark case {BENCHMARK}")
diff --git a/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt b/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt
index f6b4e1ff..c8b02785 100644
--- a/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt
+++ b/apps/benchmarks/NonUniformGridGPU/CMakeLists.txt
@@ -11,5 +11,5 @@ waLBerla_generate_target_from_python(NAME NonUniformGridGPUGenerated
NonUniformGridGPUBoundaryCollection.h
NonUniformGridGPUInfoHeader.h)
waLBerla_add_executable( NAME NonUniformGridGPU
- FILES NonUniformGridGPU.cpp LdcSetup.h
+ FILES NonUniformGridGPU.cpp LdcSetup.h GridGeneration.h
DEPENDS blockforest boundary core gpu domain_decomposition field geometry lbm_generated python_coupling timeloop vtk NonUniformGridGPUGenerated )
\ No newline at end of file
diff --git a/apps/benchmarks/NonUniformGridGPU/GridGeneration.h b/apps/benchmarks/NonUniformGridGPU/GridGeneration.h
new file mode 100644
index 00000000..5de0a45d
--- /dev/null
+++ b/apps/benchmarks/NonUniformGridGPU/GridGeneration.h
@@ -0,0 +1,139 @@
+//======================================================================================================================
+//
+// This file is part of waLBerla. waLBerla is free software: you can
+// redistribute it and/or modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation, either version 3 of
+// the License, or (at your option) any later version.
+//
+// waLBerla is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with waLBerla (see COPYING.txt). If not, see .
+//
+//! \file GridGeneration.h
+//! \author Markus Holzer
+//
+//======================================================================================================================
+#pragma once
+
+#include "blockforest/Initialization.h"
+#include "blockforest/SetupBlock.h"
+#include "blockforest/SetupBlockForest.h"
+#include "blockforest/loadbalancing/StaticCurve.h"
+
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "core/timing/TimingPool.h"
+
+#include
+
+#include "LdcSetup.h"
+#include "NonUniformGridGPUInfoHeader.h"
+
+using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification;
+using Stencil_T = StorageSpecification_T::Stencil;
+
+using namespace walberla;
+
+void createSetupBlockForest(SetupBlockForest& setupBfs,
+ const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup,
+ const bool useMPIManager=false)
+{
+ WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
+
+ Vector3 domainSize = domainSetup.getParameter >("domainSize");
+ Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
+ Vector3 rootBlocks = domainSetup.getParameter >("rootBlocks");
+ Vector3 periodic = domainSetup.getParameter >("periodic");
+
+ const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
+ uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
+ const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
+ const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
+ const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
+
+ if(useMPIManager)
+ numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());
+
+ const LDC ldc(refinementDepth);
+
+ auto refSelection = ldc.refinementSelector();
+ setupBfs.addRefinementSelectionFunction(std::function(refSelection));
+ const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
+ setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
+ setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
+ setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
+
+ if(mpi::MPIManager::instance()->numProcesses() > 1)
+ return;
+
+ {
+ std::ostringstream oss;
+ oss << blockForestFilestem << ".bfs";
+ setupBfs.saveToFile(oss.str().c_str());
+ }
+
+ if(writeVtk){
+ setupBfs.writeVTKOutput(blockForestFilestem);
+ }
+
+ if(outputStatistics){
+ WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================");
+ WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
+ for (uint_t level = 0; level <= refinementDepth; level++){
+ const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
+ WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
+ }
+
+ const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
+ WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
+
+ const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
+ const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
+
+ const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
+ const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
+ const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type);
+ const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
+ const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9;
+
+ WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
+ WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
+ WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB")
+
+ WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
+ }
+}
+
+void createBlockForest(shared_ptr< BlockForest >& bfs,
+ const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup)
+{
+ if (mpi::MPIManager::instance()->numProcesses() > 1){
+ const std::string blockForestFilestem =
+ blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
+ // Load structured block forest from file
+ std::ostringstream oss;
+ oss << blockForestFilestem << ".bfs";
+ const std::string setupBlockForestFilepath = oss.str();
+ std::ifstream infile(setupBlockForestFilepath.c_str());
+ if(!infile.good()){
+ WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!")
+ SetupBlockForest setupBfs;
+ createSetupBlockForest(setupBfs, domainSetup, blockForestSetup, true);
+ bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+ }
+ else{
+ bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
+ setupBlockForestFilepath.c_str(), false);
+ }
+ }
+ else{
+ SetupBlockForest setupBfs;
+ createSetupBlockForest(setupBfs, domainSetup, blockForestSetup);
+ bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
+ }
+}
\ No newline at end of file
diff --git a/apps/benchmarks/NonUniformGridGPU/LdcSetup.h b/apps/benchmarks/NonUniformGridGPU/LdcSetup.h
index 238943a7..b8431f2f 100644
--- a/apps/benchmarks/NonUniformGridGPU/LdcSetup.h
+++ b/apps/benchmarks/NonUniformGridGPU/LdcSetup.h
@@ -31,7 +31,9 @@
#include "field/FlagUID.h"
using namespace walberla;
+
using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction;
+
using FlagField_T = FlagField< uint8_t >;
class LDCRefinement
@@ -46,14 +48,8 @@ class LDCRefinement
{
const AABB & domain = forest.getDomain();
- const real_t xSize = ( domain.xSize() / real_t(12) ) * real_c( 0.99 );
- const real_t ySize = ( domain.ySize() / real_t(12) ) * real_c( 0.99 );
-
- const AABB leftCorner( domain.xMin(), domain.yMin(), domain.zMin(),
- domain.xMin() + xSize, domain.yMin() + ySize, domain.zMax() );
-
- const AABB rightCorner( domain.xMax() - xSize, domain.yMin(), domain.zMin(),
- domain.xMax(), domain.yMin() + ySize, domain.zMax() );
+ const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() );
+ const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() );
for(auto & block : forest)
{
@@ -99,8 +95,7 @@ class LDC
Cell globalCell(localCell);
sbfs.transformBlockLocalToGlobalCell(globalCell, b);
if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, ubbFlag); }
- else if (globalCell.z() < 0 || globalCell.y() < 0 || globalCell.x() < 0 ||
- globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)) || globalCell.z() >= cell_idx_c(sbfs.getNumberOfZCells(level)))
+ else if (globalCell.y() < 0 || globalCell.x() < 0 || globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)))
{
flagField->addFlag(localCell, noslipFlag);
}
diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp
index 23310334..818f612b 100644
--- a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp
+++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp
@@ -19,7 +19,6 @@
//======================================================================================================================
#include "blockforest/Initialization.h"
-#include "blockforest/loadbalancing/StaticCurve.h"
#include "core/Environment.h"
#include "core/logging/Initialization.h"
@@ -34,67 +33,55 @@
#include "gpu/AddGPUFieldToStorage.h"
#include "gpu/DeviceSelectMPI.h"
-#include "gpu/FieldCopy.h"
#include "gpu/ErrorChecking.h"
+#include "gpu/FieldCopy.h"
#include "gpu/HostFieldAllocator.h"
#include "gpu/ParallelStreams.h"
#include "gpu/communication/NonUniformGPUScheme.h"
-#include "lbm_generated/evaluation/PerformanceEvaluation.h"
-#include "lbm_generated/field/PdfField.h"
-#include "lbm_generated/field/AddToStorage.h"
-#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h"
-#include "lbm_generated/gpu/GPUPdfField.h"
-#include "lbm_generated/gpu/AddToStorage.h"
-#include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h"
-
#include "python_coupling/CreateConfig.h"
+#include "python_coupling/DictWrapper.h"
#include "python_coupling/PythonCallback.h"
#include
+#include "GridGeneration.h"
#include "LdcSetup.h"
#include "NonUniformGridGPUInfoHeader.h"
+#include "lbm_generated/evaluation/PerformanceEvaluation.h"
+#include "lbm_generated/field/AddToStorage.h"
+#include "lbm_generated/field/PdfField.h"
+#include "lbm_generated/gpu/AddToStorage.h"
+#include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h"
+#include "lbm_generated/gpu/GPUPdfField.h"
+#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h"
using namespace walberla;
using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification;
-using Stencil_T = StorageSpecification_T::Stencil;
+using Stencil_T = StorageSpecification_T::Stencil;
using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
-using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
-using GPUPdfField_T = lbm_generated::GPUPdfField< StorageSpecification_T >;
-using FlagField_T = FlagField< uint8_t >;
+using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
+using GPUPdfField_T = lbm_generated::GPUPdfField< StorageSpecification_T >;
+using FlagField_T = FlagField< uint8_t >;
using BoundaryCollection_T = lbm::NonUniformGridGPUBoundaryCollection< FlagField_T >;
using SweepCollection_T = lbm::NonUniformGridGPUSweepCollection;
using gpu::communication::NonUniformGPUScheme;
-namespace {
-void createSetupBlockForest(SetupBlockForest& setupBfs, const Config::BlockHandle& domainSetup, LDC& ldcSetup, const uint_t numProcesses=uint_c(MPIManager::instance()->numProcesses())) {
- Vector3 domainSize = domainSetup.getParameter >("domainSize");
- Vector3 rootBlocks = domainSetup.getParameter >("rootBlocks");
- Vector3 periodic = domainSetup.getParameter >("periodic");
-
- auto refSelection = ldcSetup.refinementSelector();
- setupBfs.addRefinementSelectionFunction(std::function(refSelection));
- const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
- setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
- setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
- setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
-}
-}
-
int main(int argc, char** argv)
{
const mpi::Environment env(argc, argv);
mpi::MPIManager::instance()->useWorldComm();
gpu::selectDeviceBasedOnMpiRank();
+ const std::string input_filename(argv[1]);
+ const bool inputIsPython = string_ends_with(input_filename, ".py");
+
for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
{
WALBERLA_MPI_WORLD_BARRIER()
-
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -103,68 +90,32 @@ int main(int argc, char** argv)
auto config = *cfg;
logging::configureLogging(config);
- auto domainSetup = config->getOneBlock("DomainSetup");
+ auto domainSetup = config->getOneBlock("DomainSetup");
+ auto blockForestSetup = config->getOneBlock("SetupBlockForest");
+ const bool writeSetupForestAndReturn = blockForestSetup.getParameter< bool >("writeSetupForestAndReturn", true);
+
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
// Reading parameters
- auto parameters = config->getOneBlock("Parameters");
- const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.4));
- const uint_t refinementDepth = parameters.getParameter< uint_t >("refinementDepth", uint_c(1));
- const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50));
- const bool cudaEnabledMPI = parameters.getParameter< bool >("cudaEnabledMPI", false);
- const bool writeSetupForestAndReturn = parameters.getParameter< bool >("writeSetupForestAndReturn", false);
- const bool benchmarkKernelOnly = parameters.getParameter< bool >("benchmarkKernelOnly", false);
- const uint_t numProcesses = parameters.getParameter< uint_t >( "numProcesses");
-
- auto ldc = std::make_shared< LDC >(refinementDepth );
- SetupBlockForest setupBfs;
- if (writeSetupForestAndReturn)
- {
- WALBERLA_LOG_INFO_ON_ROOT("Creating SetupBlockForest for " << numProcesses << " processes")
- WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
- createSetupBlockForest(setupBfs, domainSetup, *ldc, numProcesses);
-
- WALBERLA_ROOT_SECTION() { setupBfs.writeVTKOutput("SetupBlockForest"); }
+ auto parameters = config->getOneBlock("Parameters");
+ const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.95));
+ const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50));
+ const bool gpuEnabledMPI = parameters.getParameter< bool >("gpuEnabledMPI", false);
- WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
- uint_t totalCellUpdates( 0.0 );
- for (uint_t level = 0; level <= refinementDepth; level++)
- {
- const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
- const uint_t numberOfCells = numberOfBlocks * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
- totalCellUpdates += timesteps * math::uintPow2(level) * numberOfCells;
- WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
- }
- cudaDeviceProp prop{};
- WALBERLA_GPU_CHECK(gpuGetDeviceProperties(&prop, 0))
-
- const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
+ shared_ptr< BlockForest > bfs;
+ createBlockForest(bfs, domainSetup, blockForestSetup);
- const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
- const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
- const uint_t sizePerValue = sizeof(PdfField_T::value_type);
- const double totalGPUMem = double_c(prop.totalGlobalMem) * 1e-9;
- const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
-
- WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
- WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
- WALBERLA_LOG_INFO_ON_ROOT( "The total cell updates after " << timesteps << " timesteps (on the coarse level) will be " << totalCellUpdates)
- WALBERLA_LOG_INFO_ON_ROOT( "Total GPU memory " << totalGPUMem)
-
- WALBERLA_LOG_INFO_ON_ROOT("Ending program")
+ if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1)
+ {
+ WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
return EXIT_SUCCESS;
}
- WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
- createSetupBlockForest(setupBfs, domainSetup, *ldc);
-
- // Create structured block forest
- WALBERLA_LOG_INFO_ON_ROOT("Creating structured block forest...")
- auto bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
- auto blocks = std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
+ auto blocks =
+ std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
blocks->createCellBoundingBoxes();
- WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks())
- for (uint_t level = 0; level <= refinementDepth; level++)
+ WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks() << " on " << blocks->getNumberOfLevels() << " refinement levels")
+ for (uint_t level = 0; level < blocks->getNumberOfLevels(); level++)
{
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << blocks->getNumberOfBlocks(level))
}
@@ -172,26 +123,35 @@ int main(int argc, char** argv)
WALBERLA_LOG_INFO_ON_ROOT("Start field allocation")
// Creating fields
const StorageSpecification_T StorageSpec = StorageSpecification_T();
- auto allocator = make_shared< gpu::HostFieldAllocator >();
- const BlockDataID pdfFieldCpuID = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx, allocator);
- const BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2), allocator);
- const BlockDataID densityFieldCpuID = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2), allocator);
- const BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
-
- const BlockDataID pdfFieldGpuID = lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, pdfFieldCpuID, StorageSpec, "pdfs on GPU", true);
+ auto allocator = make_shared< gpu::HostFieldAllocator< real_t > >();
+ const BlockDataID pdfFieldCpuID =
+ lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx, allocator);
+ const BlockDataID velFieldCpuID =
+ field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2), allocator);
+ const BlockDataID densityFieldCpuID =
+ field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2), allocator);
+ const BlockDataID flagFieldID =
+ field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
+
+ const BlockDataID pdfFieldGpuID =
+ lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, pdfFieldCpuID, StorageSpec, "pdfs on GPU", true);
const BlockDataID velFieldGpuID =
gpu::addGPUFieldToStorage< VelocityField_T >(blocks, velFieldCpuID, "velocity on GPU", true);
const BlockDataID densityFieldGpuID =
gpu::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldCpuID, "velocity on GPU", true);
WALBERLA_LOG_INFO_ON_ROOT("Finished field allocation")
- const Cell innerOuterSplit = Cell(parameters.getParameter< Vector3 >("innerOuterSplit", Vector3(1, 1, 1)));
- Vector3< int32_t > gpuBlockSize = parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1));
- SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit);
- for (auto& iBlock : *blocks)
- {
- sweepCollection.initialise(&iBlock, cell_idx_c(1), nullptr);
+ const Cell innerOuterSplit =
+ Cell(parameters.getParameter< Vector3< cell_idx_t > >("innerOuterSplit", Vector3< cell_idx_t >(1, 1, 1)));
+ Vector3< int32_t > gpuBlockSize =
+ parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1));
+ SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0],
+ gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit);
+
+ for (auto& iBlock : *blocks){
+ sweepCollection.initialise(&iBlock, cell_idx_c(1));
}
+ sweepCollection.initialiseBlockPointer();
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
WALBERLA_MPI_BARRIER()
@@ -200,9 +160,11 @@ int main(int argc, char** argv)
/// LB SWEEPS AND BOUNDARY HANDLING ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ auto ldc = std::make_shared< LDC >(blocks->getDepth());
+
const FlagUID fluidFlagUID("Fluid");
ldc->setupBoundaryFlagField(*blocks, flagFieldID);
- geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 2);
+ geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 0);
BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldGpuID, fluidFlagUID);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -210,8 +172,8 @@ int main(int argc, char** argv)
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
- auto communication = std::make_shared< NonUniformGPUScheme > (blocks, cudaEnabledMPI);
- auto packInfo = lbm_generated::setupNonuniformGPUPdfCommunication(blocks, pdfFieldGpuID);
+ auto communication = std::make_shared< NonUniformGPUScheme< CommunicationStencil_T > >(blocks, gpuEnabledMPI);
+ auto packInfo = lbm_generated::setupNonuniformGPUPdfCommunication< GPUPdfField_T >(blocks, pdfFieldGpuID);
communication->addPackInfo(packInfo);
WALBERLA_MPI_BARRIER()
@@ -224,28 +186,31 @@ int main(int argc, char** argv)
sweepCollection.setOuterPriority(streamHighPriority);
auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
- lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T > LBMMeshRefinement(blocks, pdfFieldGpuID, sweepCollection, boundaryCollection, communication, packInfo);
+ lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T >
+ LBMMeshRefinement(blocks, pdfFieldGpuID, sweepCollection, boundaryCollection, communication, packInfo);
SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
- // LBMMeshRefinement.test(5);
- // return EXIT_SUCCESS;
-
- if(benchmarkKernelOnly){
- timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL), "LBM StreamCollide");
- }
- else{
- LBMMeshRefinement.addRefinementToTimeLoop(timeLoop);
- }
+ LBMMeshRefinement.addRefinementToTimeLoop(timeLoop, uint_c(0));
// VTK
const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
- if (vtkWriteFrequency > 0)
- {
- auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
- "simulation_step", false, true, true, false, 0);
+ const bool useVTKAMRWriter = parameters.getParameter< bool >("useVTKAMRWriter", false);
+ const bool oneFilePerProcess = parameters.getParameter< bool >("oneFilePerProcess", false);
+
+ auto finalDomain = blocks->getDomain();
+ if (vtkWriteFrequency > 0){
+ auto vtkOutput =
+ vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", "simulation_step",
+ false, true, true, false, 0, useVTKAMRWriter, oneFilePerProcess);
auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldCpuID, "vel");
vtkOutput->addCellDataWriter(velWriter);
+ if (parameters.getParameter< bool >("writeOnlySlice", true)){
+ const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - blocks->dz(blocks->getDepth()),
+ finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + blocks->dz(blocks->getDepth()));
+ vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
+ }
+
vtkOutput->addBeforeFunction([&]() {
for (auto& block : *blocks)
sweepCollection.calculateMacroscopicParameters(&block);
@@ -260,17 +225,17 @@ int main(int argc, char** argv)
auto remainingTimeLoggerFrequency =
parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(-1.0)); // in seconds
- if (remainingTimeLoggerFrequency > 0)
- {
+ if (remainingTimeLoggerFrequency > 0){
auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency);
timeLoop.addFuncAfterTimeStep(logger, "remaining time logger");
}
- lbm_generated::PerformanceEvaluation const performance(blocks, flagFieldID, fluidFlagUID);
- field::CellCounter< FlagField_T > fluidCells( blocks, flagFieldID, fluidFlagUID );
+ lbm_generated::PerformanceEvaluation< FlagField_T > const performance(blocks, flagFieldID, fluidFlagUID);
+ field::CellCounter< FlagField_T > fluidCells(blocks, flagFieldID, fluidFlagUID);
fluidCells();
- WALBERLA_LOG_INFO_ON_ROOT( "Non uniform Grid benchmark with " << fluidCells.numberOfCells() << " fluid cells (in total on all levels)")
+ WALBERLA_LOG_INFO_ON_ROOT("Non uniform Grid benchmark with " << fluidCells.numberOfCells()
+ << " fluid cells (in total on all levels)")
WcTimingPool timeloopTiming;
WcTimer simTimer;
@@ -294,6 +259,32 @@ int main(int argc, char** argv)
const auto reducedTimeloopTiming = timeloopTiming.getReduced();
WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
+
+ WALBERLA_ROOT_SECTION()
+ {
+ if (inputIsPython)
+ {
+ python_coupling::PythonCallback pythonCallbackResults("results_callback");
+ if (pythonCallbackResults.isCallable())
+ {
+ pythonCallbackResults.data().exposeValue("numProcesses", lbm_generated::PerformanceEvaluation< FlagField_T >::processes());
+ pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
+ pythonCallbackResults.data().exposeValue("numCores", performance.cores());
+ pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
+ pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
+ pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
+ pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
+ pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
+ pythonCallbackResults.data().exposeValue("stencil", infoStencil);
+ pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
+ pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup);
+ pythonCallbackResults.data().exposeValue("cse_global", infoCseGlobal);
+ pythonCallbackResults.data().exposeValue("cse_pdfs", infoCsePdfs);
+ // Call Python function to report results
+ pythonCallbackResults();
+ }
+ }
+ }
}
return EXIT_SUCCESS;
}
\ No newline at end of file
diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py
index d523b5c0..a1f5bff2 100644
--- a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py
+++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.py
@@ -7,7 +7,7 @@
from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule
-from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil
+from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil, SubgridScaleModel
from pystencils_walberla import CodeGeneration, generate_info_header
from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
@@ -30,20 +30,25 @@
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""
-
+
with CodeGeneration() as ctx:
field_type = "float64" if ctx.double_accuracy else "float32"
- streaming_pattern = 'pull'
+ streaming_pattern = 'esopull'
timesteps = get_timesteps(streaming_pattern)
stencil = LBStencil(Stencil.D3Q19)
+ method_enum = Method.CUMULANT
+
+ fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False
+ collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower()
assert stencil.D == 3, "This application supports only three-dimensional stencils"
pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
- lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=omega,
+ lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True,
+ fourth_order_correction=fourth_order_correction,
streaming_pattern=streaming_pattern)
lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx')
@@ -66,7 +71,7 @@
infoHeaderParams = {
'stencil': stencil.name.lower(),
'streaming_pattern': streaming_pattern,
- 'collision_setup': lbm_config.method.name.lower(),
+ 'collision_setup': collision_setup,
'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs),
}
diff --git a/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py
index 34bc6caa..6f7b6820 100644
--- a/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py
+++ b/apps/benchmarks/NonUniformGridGPU/simulation_setup/benchmark_configs.py
@@ -1,16 +1,58 @@
import waLBerla as wlb
+from waLBerla.tools.config import block_decomposition
+from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
+import sqlite3
+import os
+import sys
+
+try:
+ import machinestate as ms
+except ImportError:
+ ms = None
+
+DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3")
+BENCHMARK = int(os.environ.get('BENCHMARK', 0))
+
+WeakX = int(os.environ.get('WeakX', 128))
+WeakY = int(os.environ.get('WeakY', 128))
+WeakZ = int(os.environ.get('WeakZ', 128))
+
+StrongX = int(os.environ.get('StrongX', 128))
+StrongY = int(os.environ.get('StrongY', 128))
+StrongZ = int(os.environ.get('StrongZ', 128))
class Scenario:
- def __init__(self, domain_size=(64, 64, 64), root_blocks=(2, 2, 2),
- cells_per_block=(32, 32, 32), refinement_depth=0):
+ def __init__(self,
+ domain_size=(64, 64, 64),
+ root_blocks=(2, 2, 2),
+ num_processes=1,
+ refinement_depth=0,
+ cells_per_block=(32, 32, 32),
+ timesteps=101,
+ gpu_enabled_mpi=False,
+ vtk_write_frequency=0,
+ logger_frequency=30,
+ blockforest_filestem="blockforest",
+ write_setup_vtk=True,
+ db_file_name=None):
self.domain_size = domain_size
self.root_blocks = root_blocks
self.cells_per_block = cells_per_block
+ self.periodic = (0, 0, 1)
+
self.refinement_depth = refinement_depth
+ self.num_processes = num_processes
+ self.bfs_filestem = blockforest_filestem
+ self.write_setup_vtk = write_setup_vtk
+
+ self.timesteps = timesteps
+ self.gpu_enabled_mpi = gpu_enabled_mpi
+ self.vtk_write_frequency = vtk_write_frequency
+ self.logger_frequency = logger_frequency
- self.periodic = (0, 0, 0)
+ self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False)
@@ -22,39 +64,79 @@ def config(self, print_dict=True):
'domainSize': self.domain_size,
'rootBlocks': self.root_blocks,
'cellsPerBlock': self.cells_per_block,
- 'periodic': self.periodic
+ 'periodic': self.periodic,
+ },
+ 'SetupBlockForest': {
+ 'refinementDepth': self.refinement_depth,
+ 'numProcesses': self.num_processes,
+ 'blockForestFilestem': self.bfs_filestem,
+ 'writeVtk': self.write_setup_vtk,
+ 'outputStatistics': True,
+ 'writeSetupForestAndReturn': True,
},
'Parameters': {
'omega': 1.95,
- 'timesteps': 30001,
-
- 'refinementDepth': self.refinement_depth,
- 'writeSetupForestAndReturn': False,
- 'numProcesses': 1,
-
- 'cudaEnabledMPI': False,
- 'benchmarkKernelOnly': False,
-
- 'remainingTimeLoggerFrequency': 3,
-
- 'vtkWriteFrequency': 10000,
+ 'timesteps': self.timesteps,
+ 'remainingTimeLoggerFrequency': self.logger_frequency,
+ 'vtkWriteFrequency': self.vtk_write_frequency,
+ 'useVTKAMRWriter': True,
+ 'oneFilePerProcess': False,
+ 'writeOnlySlice': False,
+ 'gpuEnabledMPI': self.gpu_enabled_mpi,
+ 'gpuBlockSize': (128, 1, 1),
},
'Logging': {
'logLevel': "info",
}
}
- if print_dict and config_dict["Parameters"]["writeSetupForestAndReturn"] is False:
+ if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
+
return config_dict
+ @wlb.member_callback
+ def results_callback(self, **kwargs):
+ data = {}
+ data.update(self.config_dict['Parameters'])
+ data.update(self.config_dict['DomainSetup'])
+ data.update(kwargs)
+
+ data['executable'] = sys.argv[0]
+ data['compile_flags'] = wlb.build_info.compiler_flags
+ data['walberla_version'] = wlb.build_info.version
+ data['build_machine'] = wlb.build_info.build_machine
+
+ if ms:
+ state = ms.MachineState(extended=False, anonymous=True)
+ state.generate() # generate subclasses
+ state.update() # read information
+ data["MachineState"] = str(state.get())
+ else:
+ print("MachineState module is not available. MachineState was not saved")
+
+ sequenceValuesToScalars(data)
+ result = data
+ sequenceValuesToScalars(result)
+ num_tries = 4
+ # check multiple times e.g. may fail when multiple benchmark processes are running
+ table_name = f"runs"
+ table_name = table_name.replace("-", "_")
+ for num_try in range(num_tries):
+ try:
+ checkAndUpdateSchema(result, table_name, self.db_file_name)
+ storeSingle(result, table_name, self.db_file_name)
+ break
+ except sqlite3.OperationalError as e:
+ wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
+
def validation_run():
"""Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
wlb.log_info_on_root("Validation run")
- domain_size = (96, 96, 96)
- cells_per_block = (32, 32, 32)
+ domain_size = (192, 192, 64)
+ cells_per_block = (64, 64, 64)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
@@ -62,8 +144,91 @@ def validation_run():
scenario = Scenario(domain_size=domain_size,
root_blocks=root_blocks,
cells_per_block=cells_per_block,
- refinement_depth=1)
+ timesteps=0,
+ vtk_write_frequency=0,
+ refinement_depth=3,
+ gpu_enabled_mpi=False)
scenarios.add(scenario)
-validation_run()
+def weak_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True):
+ wlb.log_info_on_root("Running weak scaling benchmark...")
+
+ # This benchmark must run from 16 GPUs onwards
+ if wlb.mpi.numProcesses() > 1:
+ num_proc = wlb.mpi.numProcesses()
+
+ if uniform:
+ factor = 3 * num_proc
+ name = "uniform"
+ else:
+ if num_proc % 16 != 0:
+ raise RuntimeError("Number of processes must be dividable by 16")
+ factor = int(num_proc // 16)
+ name = "nonuniform"
+
+ cells_per_block = (WeakX, WeakY, WeakZ)
+ domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
+
+ root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
+
+ scenarios = wlb.ScenarioManager()
+ scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
+ domain_size=domain_size,
+ root_blocks=root_blocks,
+ num_processes=num_proc,
+ cells_per_block=cells_per_block,
+ refinement_depth=0 if uniform else 3,
+ timesteps=10,
+ gpu_enabled_mpi=gpu_enabled_mpi,
+ db_file_name=f"weakScalingGPU{name}LDC.sqlite3")
+ scenarios.add(scenario)
+
+
+def strong_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True):
+ wlb.log_info_on_root("Running strong scaling benchmark...")
+
+ # This benchmark must run from 64 GPUs onwards
+ if wlb.mpi.numProcesses() > 1:
+ num_proc = wlb.mpi.numProcesses()
+
+ if num_proc % 64 != 0:
+ raise RuntimeError("Number of processes must be dividable by 64")
+
+ cells_per_block = (StrongX, StrongY, StrongZ)
+
+ if uniform:
+ domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16)
+ name = "uniform"
+ else:
+ factor = int(num_proc / 64)
+ blocks64 = block_decomposition(factor)
+ cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))])
+ domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
+ name = "nonuniform"
+
+ root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
+
+ scenarios = wlb.ScenarioManager()
+ scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
+ domain_size=domain_size,
+ root_blocks=root_blocks,
+ num_processes=num_proc,
+ cells_per_block=cells_per_block,
+ refinement_depth=0 if uniform else 3,
+ timesteps=10,
+ gpu_enabled_mpi=gpu_enabled_mpi,
+ db_file_name=f"strongScalingGPU{name}LDC.sqlite3")
+ scenarios.add(scenario)
+
+
+if BENCHMARK == 0:
+ validation_run()
+elif BENCHMARK == 1:
+ weak_scaling_ldc(1, True, False)
+elif BENCHMARK == 2:
+ strong_scaling_ldc(1, True, False)
+else:
+ print(f"Invalid benchmark case {BENCHMARK}")
+
+
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt b/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt
index 1b530d61..bb199b8f 100644
--- a/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt
+++ b/apps/benchmarks/PhaseFieldAllenCahn/CMakeLists.txt
@@ -15,10 +15,10 @@ waLBerla_generate_target_from_python(NAME BenchmarkPhaseFieldCodeGen
if (WALBERLA_BUILD_WITH_GPU_SUPPORT )
waLBerla_add_executable(NAME benchmark_multiphase
FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py
- DEPENDS blockforest core gpu field postprocessing python_coupling lbm geometry timeloop gui BenchmarkPhaseFieldCodeGen)
+ DEPENDS blockforest core gpu field postprocessing python_coupling lbm_generated geometry timeloop gui BenchmarkPhaseFieldCodeGen)
else ()
waLBerla_add_executable(NAME benchmark_multiphase
FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py
- DEPENDS blockforest core field postprocessing python_coupling lbm geometry timeloop gui BenchmarkPhaseFieldCodeGen)
+ DEPENDS blockforest core field postprocessing python_coupling lbm_generated geometry timeloop gui BenchmarkPhaseFieldCodeGen)
endif (WALBERLA_BUILD_WITH_GPU_SUPPORT )
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py b/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py
index 88a410c1..a703e9df 100755
--- a/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py
+++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py
@@ -8,6 +8,11 @@
import sys
from math import prod
+try:
+ import machinestate as ms
+except ImportError:
+ ms = None
+
def domain_block_size_ok(block_size, total_mem, gls=1, q_phase=15, q_hydro=27, size_per_value=8):
"""Checks if a single block of given size fits into GPU memory"""
@@ -20,7 +25,9 @@ def domain_block_size_ok(block_size, total_mem, gls=1, q_phase=15, q_hydro=27, s
class Scenario:
- def __init__(self, time_step_strategy, cuda_block_size, cells_per_block=(256, 256, 256),
+ def __init__(self, time_step_strategy,
+ cuda_block_size,
+ cells_per_block=(256, 256, 256),
cuda_enabled_mpi=False):
# output frequencies
self.vtkWriteFrequency = 0
@@ -89,6 +96,14 @@ def results_callback(self, **kwargs):
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
+ if ms:
+ state = ms.MachineState(extended=False, anonymous=True)
+ state.generate() # generate subclasses
+ state.update() # read information
+ data["MachineState"] = str(state.get())
+ else:
+ print("MachineState module is not available. MachineState was not saved")
+
sequenceValuesToScalars(data)
df = pd.DataFrame.from_records([data])
@@ -101,43 +116,19 @@ def results_callback(self, **kwargs):
def benchmark():
scenarios = wlb.ScenarioManager()
- gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
+ gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 40))
gpu_mem = gpu_mem_gb * (2 ** 30)
- block_size = (256, 256, 256)
+ block_size = (320, 320, 320)
+ cuda_enabled_mpi = True
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
else:
- scenarios.add(Scenario(time_step_strategy='normal', cuda_block_size=(256, 1, 1), cells_per_block=block_size))
-
+ scenarios.add(Scenario(time_step_strategy='normal',
+ cuda_block_size=(128, 1, 1),
+ cells_per_block=block_size,
+ cuda_enabled_mpi=cuda_enabled_mpi))
-def kernel_benchmark():
- scenarios = wlb.ScenarioManager()
-
- gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
- gpu_mem = gpu_mem_gb * (2 ** 30)
- block_sizes = [(i, i, i) for i in (32, 64, 128, 256, 320, 384, 448, 512)]
-
- cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1),
- (32, 2, 1), (64, 2, 1), (128, 2, 1),
- (32, 4, 1), (64, 4, 1),
- (32, 4, 2),
- (32, 8, 1),
- (16, 16, 1)]
-
- for time_step_strategy in ['phase_only', 'hydro_only', 'kernel_only', 'normal']:
- for cuda_block in cuda_blocks:
- for block_size in block_sizes:
- if not domain_block_size_ok(block_size, gpu_mem):
- wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
- continue
- scenario = Scenario(time_step_strategy=time_step_strategy,
- cuda_block_size=cuda_block,
- cells_per_block=block_size)
- scenarios.add(scenario)
-
-
-# benchmark()
-kernel_benchmark()
+benchmark()
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp
index 2a59e6be..fac902a0 100644
--- a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp
+++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp
@@ -29,6 +29,7 @@
#include "field/vtk/VTKWriter.h"
#include "geometry/InitBoundaryHandling.h"
+#include "lbm_generated/evaluation/PerformanceEvaluation.h"
#include "python_coupling/CreateConfig.h"
#include "python_coupling/DictWrapper.h"
@@ -78,14 +79,10 @@ int main(int argc, char** argv)
logging::configureLogging(config);
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGridFromConfig(config);
- Vector3< uint_t > cellsPerBlock =
- config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock");
// Reading parameters
auto parameters = config->getOneBlock("Parameters");
const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "normal");
const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50));
- const real_t remainingTimeLoggerFrequency =
- parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(3.0));
const uint_t scenario = parameters.getParameter< uint_t >("scenario", uint_c(1));
const uint_t warmupSteps = parameters.getParameter< uint_t >("warmupSteps", uint_t(2));
@@ -102,6 +99,7 @@ int main(int argc, char** argv)
gpu::addGPUFieldToStorage< VelocityField_T >(blocks, vel_field, "velocity field on GPU", true);
BlockDataID phase_field_gpu =
gpu::addGPUFieldToStorage< PhaseField_T >(blocks, phase_field, "phase field on GPU", true);
+ BlockDataID phase_field_tmp = gpu::addGPUFieldToStorage< PhaseField_T >(blocks, phase_field, "temporary phasefield", true);
#else
BlockDataID lb_phase_field =
field::addToStorage< PdfField_phase_T >(blocks, "lb phase field", real_c(0.0), field::fzyx);
@@ -109,6 +107,7 @@ int main(int argc, char** argv)
field::addToStorage< PdfField_hydro_T >(blocks, "lb velocity field", real_c(0.0), field::fzyx);
BlockDataID vel_field = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx);
BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_c(0.0), field::fzyx);
+ BlockDataID phase_field_tmp = field::addToStorage< PhaseField_T >(blocks, "phase tmp", real_c(0.0), field::fzyx);
#endif
if (timeStepStrategy != "phase_only" && timeStepStrategy != "hydro_only" && timeStepStrategy != "kernel_only")
@@ -139,47 +138,80 @@ int main(int argc, char** argv)
pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field_gpu, vel_field_gpu);
pystencils::phase_field_LB_step phase_field_LB_step(
- lb_phase_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+ lb_phase_field_gpu, phase_field_gpu, phase_field_tmp, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0],
gpuBlockSize[1], gpuBlockSize[2]);
#else
pystencils::initialize_phase_field_distributions init_h(lb_phase_field, phase_field, vel_field);
pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field, vel_field);
- pystencils::phase_field_LB_step phase_field_LB_step(lb_phase_field, phase_field, vel_field);
+ pystencils::phase_field_LB_step phase_field_LB_step(lb_phase_field, phase_field, phase_field_tmp, vel_field);
pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field, phase_field, vel_field);
#endif
// add communication
#if defined(WALBERLA_BUILD_WITH_CUDA)
- const bool cudaEnabledMpi = parameters.getParameter< bool >("cudaEnabledMpi", false);
- auto Comm_velocity_based_distributions =
- make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi);
- auto generatedPackInfo_velocity_based_distributions =
- make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu);
- Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
+ const bool gpuEnabledMpi = parameters.getParameter< bool >("cudaEnabledMpi", false);
+ const int streamLowPriority = 0;
+ const int streamHighPriority = 0;
+ auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
+ auto innerOuterStreams = gpu::ParallelStreams(streamHighPriority);
+
+ auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions>(lb_phase_field_gpu);
+ auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu);
auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_gpu);
- Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_phase_field);
- auto Comm_phase_field_distributions =
- make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi);
- auto generatedPackInfo_phase_field_distributions =
- make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field_gpu);
- Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions);
-#else
+ auto UniformGPUSchemeVelocityBasedDistributions = make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, gpuEnabledMpi, false);
+ auto UniformGPUSchemePhaseFieldDistributions = make_shared< gpu::communication::UniformGPUScheme< Full_Stencil_T > >(blocks, gpuEnabledMpi, false);
+ auto UniformGPUSchemePhaseField = make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, gpuEnabledMpi, false, 65432);
+
+ UniformGPUSchemeVelocityBasedDistributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
+ UniformGPUSchemePhaseFieldDistributions->addPackInfo(generatedPackInfo_phase_field_distributions);
+ UniformGPUSchemePhaseField->addPackInfo(generatedPackInfo_phase_field);
+
+ auto Comm_velocity_based_distributions_start = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->startCommunication(); });
+ auto Comm_velocity_based_distributions_wait = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->wait(); });
- blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_velocity_based_distributions(blocks);
+ auto Comm_phase_field_distributions_start = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->startCommunication(); });
+ auto Comm_phase_field_distributions_wait = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->wait(); });
+ auto Comm_phase_field = std::function< void() >([&]() { UniformGPUSchemePhaseField->communicate(); });
+
+ auto swapPhaseField = std::function< void(IBlock *) >([&](IBlock * b)
+ {
+ auto phaseField = b->getData< gpu::GPUField >(phase_field_gpu);
+ auto phaseFieldTMP = b->getData< gpu::GPUField >(phase_field_tmp);
+ phaseField->swapDataPointers(phaseFieldTMP);
+ });
+
+#else
+
+ auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions>(lb_phase_field);
+ auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field);
auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field);
- auto generatedPackInfo_velocity_based_distributions =
- make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field);
- Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_phase_field);
- Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_velocity_based_distributions);
+ auto UniformGPUSchemeVelocityBasedDistributions = make_shared< blockforest::communication::UniformBufferedScheme< Full_Stencil_T > >(blocks);
+ auto UniformGPUSchemePhaseFieldDistributions = make_shared< blockforest::communication::UniformBufferedScheme< Full_Stencil_T > >(blocks);
+ auto UniformGPUSchemePhaseField = make_shared< blockforest::communication::UniformBufferedScheme< Full_Stencil_T > >(blocks, 65432);
+
+ UniformGPUSchemeVelocityBasedDistributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
+ UniformGPUSchemePhaseFieldDistributions->addPackInfo(generatedPackInfo_phase_field_distributions);
+ UniformGPUSchemePhaseField->addPackInfo(generatedPackInfo_phase_field);
- blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_phase_field_distributions(blocks);
- auto generatedPackInfo_phase_field_distributions =
- make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field);
- Comm_phase_field_distributions.addPackInfo(generatedPackInfo_phase_field_distributions);
+ auto Comm_velocity_based_distributions_start = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->startCommunication(); });
+ auto Comm_velocity_based_distributions_wait = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->wait(); });
+
+ auto Comm_phase_field_distributions = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->communicate(); });
+ auto Comm_phase_field_distributions_start = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->startCommunication(); });
+ auto Comm_phase_field_distributions_wait = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->wait(); });
+
+ auto Comm_phase_field = std::function< void() >([&]() { UniformGPUSchemePhaseField->communicate(); });
+
+ auto swapPhaseField = std::function< void(IBlock *) >([&](IBlock * b)
+ {
+ auto phaseField = b->getData< PhaseField_T >(phase_field);
+ auto phaseFieldTMP = b->getData< PhaseField_T >(phase_field_tmp);
+ phaseField->swapDataPointers(phaseFieldTMP);
+ });
#endif
BlockDataID const flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
@@ -201,99 +233,37 @@ int main(int argc, char** argv)
init_h(&block);
init_g(&block);
}
+ WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+ WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+ WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("initialization of the distributions done")
}
+ SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
#if defined(WALBERLA_BUILD_WITH_CUDA)
- int const streamLowPriority = 0;
- int const streamHighPriority = 0;
- auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
- auto innerOuterStreams = gpu::ParallelStreams(streamHighPriority);
-#endif
+ timeloop.add() << BeforeFunction(Comm_velocity_based_distributions_start, "Start Hydro PDFs Communication")
+ << Sweep(phase_field_LB_step.getSweep(defaultStream), "Phase LB Step")
+ << AfterFunction(Comm_velocity_based_distributions_wait, "Wait Hydro PDFs Communication");
- auto timeLoop = make_shared< SweepTimeloop >(blocks->getBlockStorage(), timesteps);
-#if defined(WALBERLA_BUILD_WITH_CUDA)
- auto normalTimeStep = [&]() {
- Comm_velocity_based_distributions->startCommunication();
- for (auto& block : *blocks)
- phase_field_LB_step(&block, defaultStream);
- Comm_velocity_based_distributions->wait();
+ timeloop.add() << BeforeFunction(Comm_phase_field_distributions_start, "Start Phase PDFs Communication")
+ << Sweep(hydro_LB_step.getSweep(defaultStream), "Hydro LB Step");
+ timeloop.add() << Sweep(swapPhaseField, "Swap PhaseField")
+ << AfterFunction(Comm_phase_field_distributions_wait, "Wait Phase PDFs Communication");
+
+ timeloop.addFuncAfterTimeStep(Comm_phase_field, "Communication Phase field");
- Comm_phase_field_distributions->startCommunication();
- for (auto& block : *blocks)
- hydro_LB_step(&block, defaultStream);
- Comm_phase_field_distributions->wait();
- };
- auto phase_only = [&]() {
- for (auto& block : *blocks)
- phase_field_LB_step(&block);
- };
- auto hydro_only = [&]() {
- for (auto& block : *blocks)
- hydro_LB_step(&block);
- };
- auto without_comm = [&]() {
- for (auto& block : *blocks)
- phase_field_LB_step(&block);
- for (auto& block : *blocks)
- hydro_LB_step(&block);
- };
#else
- auto normalTimeStep = [&]() {
- Comm_velocity_based_distributions.startCommunication();
- for (auto& block : *blocks)
- phase_field_LB_step(&block);
- Comm_velocity_based_distributions.wait();
-
- Comm_phase_field_distributions.startCommunication();
- for (auto& block : *blocks)
- hydro_LB_step(&block);
- Comm_phase_field_distributions.wait();
- };
- auto phase_only = [&]() {
- for (auto& block : *blocks)
- phase_field_LB_step(&block);
- };
- auto hydro_only = [&]() {
- for (auto& block : *blocks)
- hydro_LB_step(&block);
- };
- auto without_comm = [&]() {
- for (auto& block : *blocks)
- phase_field_LB_step(&block);
- for (auto& block : *blocks)
- hydro_LB_step(&block);
- };
-#endif
- std::function< void() > timeStep;
- if (timeStepStrategy == "phase_only")
- {
- timeStep = std::function< void() >(phase_only);
- WALBERLA_LOG_INFO_ON_ROOT("started only phasefield step without communication for benchmarking")
- }
- else if (timeStepStrategy == "hydro_only")
- {
- timeStep = std::function< void() >(hydro_only);
- WALBERLA_LOG_INFO_ON_ROOT("started only hydro step without communication for benchmarking")
- }
- else if (timeStepStrategy == "kernel_only")
- {
- timeStep = std::function< void() >(without_comm);
- WALBERLA_LOG_INFO_ON_ROOT("started complete phasefield model without communication for benchmarking")
- }
- else
- {
- timeStep = std::function< void() >(normalTimeStep);
- WALBERLA_LOG_INFO_ON_ROOT("normal timestep with overlapping")
- }
+ timeloop.add() << BeforeFunction(Comm_velocity_based_distributions_start, "Start Hydro PDFs Communication")
+ << Sweep(phase_field_LB_step.getSweep(), "Phase LB Step")
+ << AfterFunction(Comm_velocity_based_distributions_wait, "Wait Hydro PDFs Communication");
- timeLoop->add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step");
+ timeloop.add() << BeforeFunction(Comm_phase_field_distributions_start, "Start Phase PDFs Communication")
+ << Sweep(hydro_LB_step.getSweep(), "Hydro LB Step");
+ timeloop.add() << Sweep(swapPhaseField, "Swap PhaseField")
+ << AfterFunction(Comm_phase_field_distributions_wait, "Wait Phase PDFs Communication");
- // remaining time logger
- if (remainingTimeLoggerFrequency > 0)
- timeLoop->addFuncAfterTimeStep(
- timing::RemainingTimeLogger(timeLoop->getNrOfTimeSteps(), remainingTimeLoggerFrequency),
- "remaining time logger");
+ timeloop.addFuncAfterTimeStep(Comm_phase_field, "Communication Phase field");
+#endif
uint_t const vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
if (vtkWriteFrequency > 1)
@@ -307,40 +277,60 @@ int main(int argc, char** argv)
auto phaseWriter = make_shared< field::VTKWriter< PhaseField_T > >(phase_field, "phase");
vtkOutput->addCellDataWriter(phaseWriter);
- timeLoop->addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
+ timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
}
+ lbm_generated::PerformanceEvaluation< FlagField_T > const performance(blocks, flagFieldID, fluidFlagUID);
+ field::CellCounter< FlagField_T > fluidCells(blocks, flagFieldID, fluidFlagUID);
+ fluidCells();
+
+ WALBERLA_LOG_INFO_ON_ROOT("Multiphase benchmark with " << fluidCells.numberOfCells() << " fluid cells")
+ WALBERLA_LOG_INFO_ON_ROOT("Running " << warmupSteps << " timesteps to warm up the system")
+
for (uint_t i = 0; i < warmupSteps; ++i)
- timeLoop->singleStep();
+ timeloop.singleStep();
+
+ WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+ WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+ WALBERLA_MPI_BARRIER()
+ WALBERLA_LOG_INFO_ON_ROOT("Warmup timesteps done")
- timeLoop->setCurrentTimeStepToZero();
+ timeloop.setCurrentTimeStepToZero();
+ WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
+ WcTimingPool timeloopTiming;
WcTimer simTimer;
#if defined(WALBERLA_BUILD_WITH_CUDA)
- cudaDeviceSynchronize();
+ WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
#endif
simTimer.start();
- timeLoop->run();
+ timeloop.run(timeloopTiming);
#if defined(WALBERLA_BUILD_WITH_CUDA)
- cudaDeviceSynchronize();
+ WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+ WALBERLA_GPU_CHECK(gpuPeekAtLastError())
#endif
+ WALBERLA_MPI_BARRIER()
simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
- auto time = real_c(simTimer.last());
- auto nrOfCells = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
- auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6;
- WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process: " << mlupsPerProcess)
+ double time = simTimer.max();
+ WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
+ performance.logResultOnRoot(timesteps, time);
+
+ const auto reducedTimeloopTiming = timeloopTiming.getReduced();
+ WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
+
+ WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process: " << performance.mlupsPerProcess(timesteps, time))
WALBERLA_LOG_RESULT_ON_ROOT("Time per time step: " << time / real_c(timesteps) << " s")
WALBERLA_ROOT_SECTION()
{
python_coupling::PythonCallback pythonCallbackResults("results_callback");
if (pythonCallbackResults.isCallable())
{
- pythonCallbackResults.data().exposeValue("mlupsPerProcess", mlupsPerProcess);
+ pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
pythonCallbackResults.data().exposeValue("stencil_phase", StencilNamePhase);
pythonCallbackResults.data().exposeValue("stencil_hydro", StencilNameHydro);
#if defined(WALBERLA_BUILD_WITH_CUDA)
- pythonCallbackResults.data().exposeValue("cuda_enabled_mpi", cudaEnabledMpi);
+ pythonCallbackResults.data().exposeValue("cuda_enabled_mpi", gpuEnabledMpi);
#endif
// Call Python function to report results
pythonCallbackResults();
diff --git a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
index d0e676f5..30fe9297 100644
--- a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
+++ b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
@@ -19,7 +19,7 @@
field_type = "float64" if ctx.double_accuracy else "float32"
stencil_phase = LBStencil(Stencil.D3Q15)
- stencil_hydro = LBStencil(Stencil.D3Q27)
+ stencil_hydro = LBStencil(Stencil.D3Q19)
assert (stencil_phase.D == stencil_hydro.D)
########################
@@ -76,13 +76,13 @@
delta_equilibrium=False,
force=sp.symbols(f"F_:{stencil_phase.D}"), velocity_input=u,
weighted=True, relaxation_rates=rates,
- output={'density': C_tmp}, kernel_type='stream_pull_collide')
+ output={'density': C_tmp})
method_phase = create_lb_method(lbm_config=lbm_config_phase)
lbm_config_hydro = LBMConfig(stencil=stencil_hydro, method=Method.MRT, compressible=False,
weighted=True, relaxation_rate=omega,
force=sp.symbols(f"F_:{stencil_hydro.D}"),
- output={'velocity': u}, kernel_type='collide_stream_push')
+ output={'velocity': u})
method_hydro = create_lb_method(lbm_config=lbm_config_hydro)
# create the kernels for the initialization of the g and h field
@@ -137,7 +137,8 @@
sweep_params = {'block_size': sweep_block_size}
stencil_typedefs = {'Stencil_phase_T': stencil_phase,
- 'Stencil_hydro_T': stencil_hydro}
+ 'Stencil_hydro_T': stencil_hydro,
+ 'Full_Stencil_T': LBStencil(Stencil.D3Q27)}
field_typedefs = {'PdfField_phase_T': h,
'PdfField_hydro_T': g,
'VelocityField_T': u,
@@ -156,7 +157,7 @@
generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates, target=Target.CPU)
generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
- field_swaps=[(h, h_tmp), (C, C_tmp)],
+ field_swaps=[(h, h_tmp)],
inner_outer_split=True,
cpu_vectorize_info=cpu_vec,
target=Target.CPU)
@@ -172,7 +173,7 @@
streaming_pattern='pull', target=Target.CPU)
generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
- streaming_pattern='push', target=Target.CPU)
+ streaming_pattern='pull', target=Target.CPU)
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=Target.CPU)
@@ -183,7 +184,7 @@
g_updates, target=Target.GPU)
generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
- field_swaps=[(h, h_tmp), (C, C_tmp)],
+ field_swaps=[(h, h_tmp)],
target=Target.GPU,
gpu_indexing_params=sweep_params,
varying_parameters=vp)
@@ -198,7 +199,7 @@
streaming_pattern='pull', target=Target.GPU)
generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
- streaming_pattern='push', target=Target.GPU)
+ streaming_pattern='pull', target=Target.GPU)
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=Target.GPU)
diff --git a/apps/benchmarks/UniformGridCPU/CMakeLists.txt b/apps/benchmarks/UniformGridCPU/CMakeLists.txt
index 8e9c1e7a..76c40ea5 100644
--- a/apps/benchmarks/UniformGridCPU/CMakeLists.txt
+++ b/apps/benchmarks/UniformGridCPU/CMakeLists.txt
@@ -3,14 +3,16 @@ waLBerla_link_files_to_builddir( "*.py" )
waLBerla_link_files_to_builddir( "simulation_setup" )
-foreach(streaming_pattern pull push aa esotwist)
+foreach(streaming_pattern pull push aa esotwist esopull esopush)
foreach(stencil d3q19 d3q27)
- foreach (collision_setup srt trt w-mrt r-w-mrt cm r-cm k r-k entropic smagorinsky)
+ foreach (collision_setup srt trt mrt mrt-overrelax central central-overrelax cumulant cumulant-overrelax cumulant-K17 entropic smagorinsky qr)
# KBC methods only for D2Q9 and D3Q27 defined
if (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19")
continue()
- endif (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19")
-
+ endif (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19")
+ if (${collision_setup} STREQUAL "cumulant-K17" AND ${stencil} STREQUAL "d3q19")
+ continue()
+ endif (${collision_setup} STREQUAL "cumulant-K17" AND ${stencil} STREQUAL "d3q19")
set(config ${stencil}_${streaming_pattern}_${collision_setup})
waLBerla_generate_target_from_python(NAME UniformGridCPUGenerated_${config}
FILE UniformGridCPU.py
diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp b/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp
index 4674cfae..a7eb9ecf 100644
--- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp
+++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp
@@ -64,6 +64,9 @@ using SweepCollection_T = lbm::UniformGridCPUSweepCollection;
using blockforest::communication::UniformBufferedScheme;
+using macroFieldType = VelocityField_T::value_type;
+using pdfFieldType = PdfField_T::value_type;
+
int main(int argc, char** argv)
{
const mpi::Environment env(argc, argv);
@@ -87,10 +90,10 @@ int main(int argc, char** argv)
// Creating fields
const StorageSpecification_T StorageSpec = StorageSpecification_T();
- auto fieldAllocator = make_shared< field::AllocateAligned< real_t, 64 > >();
+ auto fieldAllocator = make_shared< field::AllocateAligned< pdfFieldType, 64 > >();
const BlockDataID pdfFieldId = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, field::fzyx, fieldAllocator);
- const BlockDataID velFieldId = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx);
- const BlockDataID densityFieldId = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx);
+ const BlockDataID velFieldId = field::addToStorage< VelocityField_T >(blocks, "vel", macroFieldType(0.0), field::fzyx);
+ const BlockDataID densityFieldId = field::addToStorage< ScalarField_T >(blocks, "density", macroFieldType(1.0), field::fzyx);
const BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field");
// Initialize velocity on cpu
@@ -235,12 +238,16 @@ int main(int argc, char** argv)
pythonCallbackResults.data().exposeValue("numProcesses", performance.processes());
pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
pythonCallbackResults.data().exposeValue("numCores", performance.cores());
+ pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
+ pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
pythonCallbackResults.data().exposeValue("stencil", infoStencil);
pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup);
+ pythonCallbackResults.data().exposeValue("vectorised", vectorised);
+ pythonCallbackResults.data().exposeValue("nontemporal", nontemporal);
pythonCallbackResults.data().exposeValue("cse_global", infoCseGlobal);
pythonCallbackResults.data().exposeValue("cse_pdfs", infoCsePdfs);
// Call Python function to report results
diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
index 5a600ead..723b28f1 100644
--- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
+++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
@@ -3,17 +3,12 @@
import sympy as sp
import pystencils as ps
-from pystencils.simp.subexpression_insertion import insert_zeros, insert_aliases, insert_constants,\
- insert_symbol_times_minus_one
-
from lbmpy.advanced_streaming import is_inplace
-from lbmpy.advanced_streaming.utility import streaming_patterns, get_accessor, Timestep
+from lbmpy.advanced_streaming.utility import streaming_patterns
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import LBMConfig, LBMOptimisation, LBStencil, create_lb_collision_rule
from lbmpy.enums import Method, Stencil, SubgridScaleModel
-from lbmpy.fieldaccess import CollideOnlyInplaceAccessor
from lbmpy.moments import get_default_moment_set_for_stencil
-from lbmpy.updatekernels import create_stream_only_kernel
from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep
from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
@@ -21,48 +16,53 @@
omega = sp.symbols('omega')
omega_free = sp.Symbol('omega_free')
-# best configs in terms of FLOPS
options_dict = {
'srt': {
'method': Method.SRT,
'relaxation_rate': omega,
- 'compressible': True,
+ 'compressible': False,
},
'trt': {
'method': Method.TRT,
'relaxation_rate': omega,
- 'compressible': True,
+ 'compressible': False,
},
- 'r-w-mrt': {
+ 'mrt': {
'method': Method.MRT,
'relaxation_rates': [omega, 1, 1, 1, 1, 1, 1],
- 'compressible': True,
+ 'compressible': False,
},
- 'w-mrt': {
+ 'mrt-overrelax': {
'method': Method.MRT,
'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
- 'compressible': True,
+ 'compressible': False,
},
- 'r-cm': {
+ 'central': {
'method': Method.CENTRAL_MOMENT,
'relaxation_rate': omega,
'compressible': True,
},
- 'cm': {
+ 'central-overrelax': {
'method': Method.CENTRAL_MOMENT,
'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
'compressible': True,
},
- 'r-k': {
- 'method': Method.CUMULANT,
+ 'cumulant': {
+ 'method': Method.MONOMIAL_CUMULANT,
'relaxation_rate': omega,
'compressible': True,
},
- 'k': {
- 'method': Method.CUMULANT,
+ 'cumulant-overrelax': {
+ 'method': Method.MONOMIAL_CUMULANT,
'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 18)],
'compressible': True,
},
+ 'cumulant-K17': {
+ 'method': Method.CUMULANT,
+ 'relaxation_rate': omega,
+ 'compressible': True,
+ 'fourth_order_correction': 0.01
+ },
'entropic': {
'method': Method.TRT_KBC_N4,
'compressible': True,
@@ -75,6 +75,11 @@
'method': Method.SRT,
'subgrid_scale_model': SubgridScaleModel.SMAGORINSKY,
'relaxation_rate': omega,
+ },
+ 'qr': {
+ 'method': Method.SRT,
+ 'subgrid_scale_model': SubgridScaleModel.QR,
+ 'relaxation_rate': omega,
}
}
@@ -83,6 +88,8 @@
const char * infoStencil = "{stencil}";
const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
+const bool vectorised = {vec};
+const bool nontemporal = {nt_stores};
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""
@@ -90,10 +97,15 @@
with CodeGeneration() as ctx:
openmp = True if ctx.openmp else False
field_type = "float64" if ctx.double_accuracy else "float32"
- if ctx.optimize_for_localhost:
- cpu_vec = {"nontemporal": False, "assume_aligned": True}
- else:
- cpu_vec = None
+ # This base pointer specification causes introduces temporary pointers in the outer loop such that the inner loop
+ # only contains aligned memory addresses. Doing so NT Stores are much more effective which causes great perfomance
+ # gains especially for the pull scheme on skylake architectures
+ base_pointer_spec = None # [['spatialInner0'], ['spatialInner1']]
+ # cpu_vec = {"instruction_set": "best", "nontemporal": False,
+ # "assume_aligned": True, 'assume_sufficient_line_padding': True}
+
+ cpu_vec = {"instruction_set": None}
+ nt_stores = False
config_tokens = ctx.config.split('_')
@@ -110,7 +122,6 @@
raise ValueError("Only D3Q27 and D3Q19 stencil are supported at the moment")
assert streaming_pattern in streaming_patterns, f"Invalid streaming pattern: {streaming_pattern}"
-
options = options_dict[collision_setup]
assert stencil.D == 3, "This application supports only three-dimensional stencils"
@@ -121,23 +132,20 @@
lbm_config = LBMConfig(stencil=stencil, field_name=pdfs.name, streaming_pattern=streaming_pattern, **options)
lbm_opt = LBMOptimisation(cse_global=True, cse_pdfs=False, symbolic_field=pdfs, field_layout='fzyx')
+ # This creates a simplified version of the central moment collision operator where the bulk and shear viscosity is
+ # not seperated. This is done to get a fair comparison with the monomial cumulants.
if lbm_config.method == Method.CENTRAL_MOMENT:
lbm_config = replace(lbm_config, nested_moments=get_default_moment_set_for_stencil(stencil))
if not is_inplace(streaming_pattern):
lbm_opt = replace(lbm_opt, symbolic_temporary_field=pdfs_tmp)
- field_swaps = [(pdfs, pdfs_tmp)]
- else:
- field_swaps = []
- # Sweep for Stream only. This is for benchmarking an empty streaming pattern without LBM.
- # is_inplace is set to False to ensure that the streaming is done with src and dst field.
- # If this is not the case the compiler might simplify the streaming in a way that benchmarking makes no sense.
- # accessor = CollideOnlyInplaceAccessor()
- accessor = get_accessor(streaming_pattern, Timestep.EVEN)
- #accessor.is_inplace = False
- field_swaps_stream_only = () if accessor.is_inplace else [(pdfs, pdfs_tmp)]
- stream_only_kernel = create_stream_only_kernel(stencil, pdfs, None if accessor.is_inplace else pdfs_tmp, accessor=accessor)
+ # This is a microbenchmark for testing how fast Q PDFs can be updated per cell. To avoid optimisations from
+ # the compiler the PDFs are shuffled inside a cell. Otherwise, for common streaming patterns compilers would
+ # typically remove the copy of the center PDF which results in an overestimation of the maximum performance
+ stream_only_kernel = []
+ for i in range(stencil.Q):
+ stream_only_kernel.append(ps.Assignment(pdfs(i), pdfs((i + 3) % stencil.Q)))
# LB Sweep
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
@@ -153,17 +161,25 @@
nonuniform=False, boundaries=[no_slip, ubb],
macroscopic_fields=macroscopic_fields,
cpu_openmp=openmp, cpu_vectorize_info=cpu_vec,
- set_pre_collision_pdfs=False)
+ base_pointer_specification=base_pointer_spec)
# Stream only kernel
+ cpu_vec_stream = None
+ if ctx.optimize_for_localhost:
+ cpu_vec_stream = {"instruction_set": "best", "nontemporal": True,
+ "assume_aligned": True, 'assume_sufficient_line_padding': True,
+ "assume_inner_stride_one": True}
+
generate_sweep(ctx, 'UniformGridCPU_StreamOnlyKernel', stream_only_kernel,
- field_swaps=field_swaps_stream_only,
- target=ps.Target.CPU, cpu_openmp=openmp)
+ target=ps.Target.CPU, cpu_openmp=openmp,
+ cpu_vectorize_info=cpu_vec_stream, base_pointer_specification=[['spatialInner0'], ['spatialInner1']])
infoHeaderParams = {
'stencil': stencil_str,
'streaming_pattern': streaming_pattern,
'collision_setup': collision_setup,
+ 'vec': int(True if cpu_vec else False),
+ 'nt_stores': int(nt_stores),
'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs),
}
diff --git a/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py
index 21235056..3cfa7a91 100644
--- a/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py
+++ b/apps/benchmarks/UniformGridCPU/simulation_setup/benchmark_configs.py
@@ -4,13 +4,26 @@
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
import sys
import sqlite3
-from math import prod
+
+try:
+ import machinestate as ms
+except ImportError:
+ ms = None
# Number of time steps run for a workload of 128^3 per process
# if double as many cells are on the process, half as many time steps are run etc.
# increase this to get more reliable measurements
TIME_STEPS_FOR_128_BLOCK = 10
DB_FILE = os.environ.get('DB_FILE', "cpu_benchmark.sqlite3")
+BENCHMARK = int(os.environ.get('BENCHMARK', 0))
+
+WeakX = int(os.environ.get('WeakX', 128))
+WeakY = int(os.environ.get('WeakY', 128))
+WeakZ = int(os.environ.get('WeakZ', 128))
+
+StrongX = int(os.environ.get('StrongX', 128))
+StrongY = int(os.environ.get('StrongY', 128))
+StrongZ = int(os.environ.get('StrongZ', 128))
def num_time_steps(block_size, time_steps_for_128_block=TIME_STEPS_FOR_128_BLOCK):
@@ -35,7 +48,7 @@ class Scenario:
def __init__(self, cells_per_block=(128, 128, 128), periodic=(1, 1, 1), blocks_per_process=1,
timesteps=None, time_step_strategy="normal", omega=1.8, inner_outer_split=(1, 1, 1),
warmup_steps=2, outer_iterations=3, init_shear_flow=False, boundary_setup=False,
- vtk_write_frequency=0, remaining_time_logger_frequency=-1):
+ vtk_write_frequency=0, remaining_time_logger_frequency=-1, db_file_name=None):
if boundary_setup:
init_shear_flow = False
@@ -58,6 +71,7 @@ def __init__(self, cells_per_block=(128, 128, 128), periodic=(1, 1, 1), blocks_p
self.vtk_write_frequency = vtk_write_frequency
self.remaining_time_logger_frequency = remaining_time_logger_frequency
+ self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False)
@@ -101,6 +115,15 @@ def results_callback(self, **kwargs):
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
+
+ if ms:
+ state = ms.MachineState(extended=False, anonymous=True)
+ state.generate() # generate subclasses
+ state.update() # read information
+ data["MachineState"] = str(state.get())
+ else:
+ print("MachineState module is not available. MachineState was not saved")
+
sequenceValuesToScalars(data)
result = data
@@ -111,8 +134,8 @@ def results_callback(self, **kwargs):
table_name = table_name.replace("-", "_")
for num_try in range(num_tries):
try:
- checkAndUpdateSchema(result, table_name, DB_FILE)
- storeSingle(result, table_name, DB_FILE)
+ checkAndUpdateSchema(result, table_name, self.db_file_name)
+ storeSingle(result, table_name, self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
@@ -156,18 +179,38 @@ def overlap_benchmark():
scenarios.add(scenario)
-def scaling_benchmark():
- """Tests different communication overlapping strategies"""
- wlb.log_info_on_root("Running scaling benchmark")
+def weak_scaling_benchmark():
+ wlb.log_info_on_root("Running weak scaling benchmark with one block per proc")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
- cells_per_block = [(32, 32, 32), (128, 128, 128)]
- for cell_per_block in cells_per_block:
- scenarios.add(Scenario(time_step_strategy='noOverlap',
+ for t in ["noOverlap", "simpleOverlap"]:
+ scenarios.add(Scenario(time_step_strategy=t,
inner_outer_split=(1, 1, 1),
- cells_per_block=cell_per_block))
+ cells_per_block=(WeakX, WeakY, WeakZ),
+ boundary_setup=True,
+ outer_iterations=1,
+ db_file_name="weakScalingUniformGridOneBlock.sqlite3"))
+
+
+def strong_scaling_benchmark():
+ wlb.log_info_on_root("Running strong scaling benchmark with one block per proc")
+ wlb.log_info_on_root("")
+
+ scenarios = wlb.ScenarioManager()
+
+ domain_size = (StrongX, StrongY, StrongZ)
+ blocks = block_decomposition(wlb.mpi.numProcesses())
+ cells_per_block = tuple([d // b for d, b in zip(domain_size, reversed(blocks))])
+
+ for t in ["noOverlap", "simpleOverlap"]:
+ scenarios.add(Scenario(cells_per_block=cells_per_block,
+ time_step_strategy=t,
+ outer_iterations=1,
+ timesteps=10,
+ boundary_setup=True,
+ db_file_name="strongScalingUniformGridOneBlock.sqlite3"))
def single_node_benchmark():
@@ -176,13 +219,11 @@ def single_node_benchmark():
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
- block_sizes = [(i, i, i) for i in (8, 16, 32, 64, 128)]
- for block_size in block_sizes:
- scenario = Scenario(cells_per_block=block_size,
- time_step_strategy='kernelOnly',
- outer_iterations=1,
- timesteps=num_time_steps(block_size))
- scenarios.add(scenario)
+ scenario = Scenario(cells_per_block=(128, 128, 128),
+ time_step_strategy='kernelOnly',
+ outer_iterations=1,
+ timesteps=10)
+ scenarios.add(scenario)
def validation_run():
@@ -211,5 +252,15 @@ def validation_run():
# performance of compute kernel (no communication)
# overlap_benchmark() # benchmarks different communication overlap options
# profiling() # run only two timesteps on a smaller domain for profiling only
-validation_run()
+# validation_run()
# scaling_benchmark()
+
+
+if BENCHMARK == 0:
+ single_node_benchmark()
+elif BENCHMARK == 1:
+ weak_scaling_benchmark()
+elif BENCHMARK == 2:
+ strong_scaling_benchmark()
+else:
+ validation_run()
diff --git a/apps/benchmarks/UniformGridGPU/CMakeLists.txt b/apps/benchmarks/UniformGridGPU/CMakeLists.txt
index 2607004f..25ca68ed 100644
--- a/apps/benchmarks/UniformGridGPU/CMakeLists.txt
+++ b/apps/benchmarks/UniformGridGPU/CMakeLists.txt
@@ -3,13 +3,16 @@ waLBerla_link_files_to_builddir( "*.py" )
waLBerla_link_files_to_builddir( "simulation_setup" )
-foreach(streaming_pattern pull push aa esotwist)
+foreach(streaming_pattern pull push aa esotwist esopull esopush)
foreach(stencil d3q19 d3q27)
- foreach (collision_setup srt trt mrt mrt-overrelax central central-overrelax cumulant cumulant-overrelax entropic smagorinsky)
+ foreach (collision_setup srt trt mrt mrt-overrelax central central-overrelax cumulant cumulant-overrelax cumulant-K17 entropic smagorinsky qr)
# KBC methods only for D2Q9 and D3Q27 defined
if (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19")
continue()
- endif (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19")
+ endif (${collision_setup} STREQUAL "entropic" AND ${stencil} STREQUAL "d3q19")
+ if (${collision_setup} STREQUAL "cumulant-K17" AND ${stencil} STREQUAL "d3q19")
+ continue()
+ endif (${collision_setup} STREQUAL "cumulant-K17" AND ${stencil} STREQUAL "d3q19")
set(config ${stencil}_${streaming_pattern}_${collision_setup})
waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_${config}
FILE UniformGridGPU.py
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
index 91b7a021..65e7b903 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
@@ -73,6 +73,8 @@ using SweepCollection_T = lbm::UniformGridGPUSweepCollection;
using gpu::communication::UniformGPUScheme;
+using macroFieldType = VelocityField_T::value_type;
+
int main(int argc, char** argv)
{
mpi::Environment const env(argc, argv);
@@ -103,9 +105,9 @@ int main(int argc, char** argv)
const StorageSpecification_T StorageSpec = StorageSpecification_T();
const BlockDataID pdfFieldCpuID = lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(1), field::fzyx);
- auto allocator = make_shared< gpu::HostFieldAllocator >(); // use pinned memory allocator for faster CPU-GPU memory transfers
- const BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(1), allocator);
- const BlockDataID densityFieldCpuID = field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(1), allocator);
+ auto allocator = make_shared< gpu::HostFieldAllocator >(); // use pinned memory allocator for faster CPU-GPU memory transfers
+ const BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >(blocks, "vel", macroFieldType(0.0), field::fzyx, uint_c(1), allocator);
+ const BlockDataID densityFieldCpuID = field::addToStorage< ScalarField_T >(blocks, "density", macroFieldType(1.0), field::fzyx, uint_c(1), allocator);
const BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field");
// Initialize velocity on cpu
@@ -136,7 +138,7 @@ int main(int argc, char** argv)
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// LB SWEEPS AND BOUNDARY HANDLING ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- const pystencils::UniformGridGPU_StreamOnlyKernel StreamOnlyKernel(pdfFieldGpuID, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
+ const pystencils::UniformGridGPU_StreamOnlyKernel StreamOnlyKernel(pdfFieldGpuID);
// Boundaries
const FlagUID fluidFlagUID("Fluid");
@@ -211,7 +213,7 @@ int main(int argc, char** argv)
vtkOutput->addBeforeFunction([&]() {
for (auto& block : *blocks)
sweepCollection.calculateMacroscopicParameters(&block);
- gpu::fieldCpy< VelocityField_T, gpu::GPUField< real_t > >(blocks, velFieldCpuID, velFieldGpuID);
+ gpu::fieldCpy< VelocityField_T, gpu::GPUField< VelocityField_T::value_type > >(blocks, velFieldCpuID, velFieldGpuID);
});
timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
}
@@ -264,6 +266,13 @@ int main(int argc, char** argv)
python_coupling::PythonCallback pythonCallbackResults("results_callback");
if (pythonCallbackResults.isCallable())
{
+ pythonCallbackResults.data().exposeValue("numProcesses", performance.processes());
+ pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
+ pythonCallbackResults.data().exposeValue("numCores", performance.cores());
+ pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
+ pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
+ pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
+ pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
pythonCallbackResults.data().exposeValue("stencil", infoStencil);
pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
index 09235c43..10c562b3 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
@@ -4,18 +4,16 @@
from dataclasses import replace
+from pystencils import Assignment
from pystencils.typing import TypedSymbol
from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
-from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil
-from lbmpy.enums import SubgridScaleModel
+from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil, SubgridScaleModel
from lbmpy.advanced_streaming import is_inplace
from lbmpy.advanced_streaming.utility import streaming_patterns
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.moments import get_default_moment_set_for_stencil
-from lbmpy.updatekernels import create_stream_only_kernel
-from lbmpy.fieldaccess import *
from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep
from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
@@ -75,6 +73,12 @@
'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 18)],
'compressible': True,
},
+ 'cumulant-K17': {
+ 'method': Method.CUMULANT,
+ 'relaxation_rate': omega,
+ 'compressible': True,
+ 'fourth_order_correction': 0.01
+ },
'entropic': {
'method': Method.TRT_KBC_N4,
'compressible': True,
@@ -87,6 +91,11 @@
'method': Method.SRT,
'subgrid_scale_model': SubgridScaleModel.SMAGORINSKY,
'relaxation_rate': omega,
+ },
+ 'qr': {
+ 'method': Method.SRT,
+ 'subgrid_scale_model': SubgridScaleModel.QR,
+ 'relaxation_rate': omega,
}
}
@@ -102,7 +111,8 @@
optimize = True
with CodeGeneration() as ctx:
- field_type = "float64" if ctx.double_accuracy else "float32"
+ pdf_data_type = "float64"
+ field_data_type = "float64"
config_tokens = ctx.config.split('_')
assert len(config_tokens) >= 3
@@ -125,8 +135,8 @@
options = options_dict[collision_setup]
assert stencil.D == 3, "This application supports only three-dimensional stencils"
- pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
- density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
+ pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {pdf_data_type}[3D]", layout='fzyx')
+ density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_data_type}[3D]", layout='fzyx')
macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
lbm_config = LBMConfig(stencil=stencil, field_name=pdfs.name, streaming_pattern=streaming_pattern, **options)
@@ -141,13 +151,12 @@
else:
field_swaps = []
- # Sweep for Stream only. This is for benchmarking an empty streaming pattern without LBM.
- # is_inplace is set to False to ensure that the streaming is done with src and dst field.
- # If this is not the case the compiler might simplify the streaming in a way that benchmarking makes no sense.
- accessor = CollideOnlyInplaceAccessor()
- accessor.is_inplace = False
- field_swaps_stream_only = [(pdfs, pdfs_tmp)]
- stream_only_kernel = create_stream_only_kernel(stencil, pdfs, pdfs_tmp, accessor=accessor)
+ # This is a microbenchmark for testing how fast Q PDFs can be updated per cell. To avoid optimisations from
+ # the compiler the PDFs are shuffled inside a cell. Otherwise, for common streaming patterns compilers would
+ # typically remove the copy of the center PDF which results in an overestimation of the maximum performance
+ stream_only_kernel = []
+ for i in range(stencil.Q):
+ stream_only_kernel.append(Assignment(pdfs(i), pdfs((i + 3) % stencil.Q)))
# LB Sweep
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
@@ -159,9 +168,10 @@
lb_method = collision_rule.method
no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
- boundary_object=NoSlip())
+ boundary_object=NoSlip(), field_data_type=pdf_data_type)
ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
- boundary_object=UBB([0.05, 0, 0], data_type=field_type))
+ boundary_object=UBB([0.05, 0, 0], data_type=field_data_type),
+ field_data_type=pdf_data_type)
generate_lbm_package(ctx, name="UniformGridGPU",
collision_rule=collision_rule,
@@ -169,12 +179,12 @@
nonuniform=False, boundaries=[no_slip, ubb],
macroscopic_fields=macroscopic_fields,
target=ps.Target.GPU, gpu_indexing_params=gpu_indexing_params,
- max_threads=max_threads, set_pre_collision_pdfs=False)
+ data_type=field_data_type, pdfs_data_type=pdf_data_type,
+ max_threads=max_threads)
# Stream only kernel
- vp = [('int32_t', 'cudaBlockSize0'), ('int32_t', 'cudaBlockSize1'), ('int32_t', 'cudaBlockSize2')]
- generate_sweep(ctx, 'UniformGridGPU_StreamOnlyKernel', stream_only_kernel, field_swaps=field_swaps_stream_only,
- gpu_indexing_params=gpu_indexing_params, varying_parameters=vp, target=ps.Target.GPU,
+ generate_sweep(ctx, 'UniformGridGPU_StreamOnlyKernel', stream_only_kernel,
+ gpu_indexing_params={'block_size': (128, 1, 1)}, target=ps.Target.GPU,
max_threads=max_threads)
infoHeaderParams = {
diff --git a/apps/benchmarks/UniformGridCPU/simulation_setup/PizDaintJobScript.py b/apps/benchmarks/UniformGridGPU/old_ideas/PizDaintJobScript.py
similarity index 100%
rename from apps/benchmarks/UniformGridCPU/simulation_setup/PizDaintJobScript.py
rename to apps/benchmarks/UniformGridGPU/old_ideas/PizDaintJobScript.py
diff --git a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py
index 74be4378..346a596e 100755
--- a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py
+++ b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py
@@ -6,11 +6,25 @@
import sqlite3
from math import prod
+try:
+ import machinestate as ms
+except ImportError:
+ ms = None
+
# Number of time steps run for a workload of 128^3 per GPU
# if double as many cells are on the GPU, half as many time steps are run etc.
# increase this to get more reliable measurements
TIME_STEPS_FOR_128_BLOCK = 1000
DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3")
+BENCHMARK = int(os.environ.get('BENCHMARK', 0))
+
+WeakX = int(os.environ.get('WeakX', 128))
+WeakY = int(os.environ.get('WeakY', 128))
+WeakZ = int(os.environ.get('WeakZ', 128))
+
+StrongX = int(os.environ.get('StrongX', 128))
+StrongY = int(os.environ.get('StrongY', 128))
+StrongZ = int(os.environ.get('StrongZ', 128))
BASE_CONFIG = {
'DomainSetup': {
@@ -39,6 +53,8 @@
def num_time_steps(block_size, time_steps_for_128_block=200):
cells = block_size[0] * block_size[1] * block_size[2]
time_steps = (128 ** 3 / cells) * time_steps_for_128_block
+ if time_steps < 10:
+ time_steps = 10
return int(time_steps)
@@ -61,13 +77,13 @@ def __init__(self, cells_per_block=(256, 128, 128), periodic=(1, 1, 1), cuda_blo
inner_outer_split=(1, 1, 1), warmup_steps=5, outer_iterations=3,
init_shear_flow=False, boundary_setup=False,
vtk_write_frequency=0, remaining_time_logger_frequency=-1,
- additional_info=None):
+ additional_info=None, blocks=None, db_file_name=None):
if boundary_setup:
init_shear_flow = False
periodic = (0, 0, 0)
- self.blocks = block_decomposition(wlb.mpi.numProcesses())
+ self.blocks = blocks if blocks else block_decomposition(wlb.mpi.numProcesses())
self.cells_per_block = cells_per_block
self.periodic = periodic
@@ -85,6 +101,7 @@ def __init__(self, cells_per_block=(256, 128, 128), periodic=(1, 1, 1), cuda_blo
self.vtk_write_frequency = vtk_write_frequency
self.remaining_time_logger_frequency = remaining_time_logger_frequency
+ self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False)
self.additional_info = additional_info
@@ -97,7 +114,6 @@ def config(self, print_dict=True):
'blocks': self.blocks,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic,
- 'oneBlockPerProcess': True
},
'Parameters': {
'omega': self.omega,
@@ -115,7 +131,6 @@ def config(self, print_dict=True):
'Logging': {
'logLevel': 'info', # info progress detail tracing
}
-
}
if self.boundary_setup:
config_dict["Boundaries"] = ldc_setup
@@ -140,6 +155,15 @@ def results_callback(self, **kwargs):
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
+
+ if ms:
+ state = ms.MachineState(extended=False, anonymous=True)
+ state.generate() # generate subclasses
+ state.update() # read information
+ data["MachineState"] = str(state.get())
+ else:
+ print("MachineState module is not available. MachineState was not saved")
+
sequenceValuesToScalars(data)
result = data
@@ -150,8 +174,8 @@ def results_callback(self, **kwargs):
table_name = table_name.replace("-", "_") # - not allowed for table name would lead to syntax error
for num_try in range(num_tries):
try:
- checkAndUpdateSchema(result, table_name, DB_FILE)
- storeSingle(result, table_name, DB_FILE)
+ checkAndUpdateSchema(result, table_name, self.db_file_name)
+ storeSingle(result, table_name, self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
@@ -200,12 +224,70 @@ def overlap_benchmark():
scenarios.add(scenario)
+def no_overlap_scaling(cuda_enabled_mpi=False):
+ """Tests different communication overlapping strategies"""
+ wlb.log_info_on_root("Running scaling benchmark without communication hiding")
+ wlb.log_info_on_root("")
+
+ scenarios = wlb.ScenarioManager()
+ # no overlap
+ scenarios.add(Scenario(cells_per_block=(256, 256, 256),
+ cuda_blocks=(128, 1, 1),
+ time_step_strategy='noOverlap',
+ inner_outer_split=(1, 1, 1),
+ cuda_enabled_mpi=cuda_enabled_mpi,
+ outer_iterations=1))
+
+
+def weak_scaling_overlap(cuda_enabled_mpi=False):
+ """Tests different communication overlapping strategies"""
+ wlb.log_info_on_root("Running scaling benchmark with communication hiding")
+ wlb.log_info_on_root("")
+
+ scenarios = wlb.ScenarioManager()
+
+ # overlap
+ for t in ["noOverlap", "simpleOverlap"]:
+ scenarios.add(Scenario(cells_per_block=(WeakX, WeakY, WeakZ),
+ cuda_blocks=(128, 1, 1),
+ time_step_strategy=t,
+ inner_outer_split=(8, 8, 8),
+ cuda_enabled_mpi=cuda_enabled_mpi,
+ outer_iterations=1,
+ boundary_setup=True,
+ db_file_name="weakScalingUniformGrid.sqlite3"))
+
+
+def strong_scaling_overlap(cuda_enabled_mpi=False):
+ wlb.log_info_on_root("Running strong scaling benchmark with one block per proc with communication hiding")
+ wlb.log_info_on_root("")
+
+ scenarios = wlb.ScenarioManager()
+
+ domain_size = (StrongX, StrongY, StrongZ)
+ blocks = block_decomposition(wlb.mpi.numProcesses())
+ cells_per_block = tuple([d // b for d, b in zip(domain_size, reversed(blocks))])
+
+ # overlap
+ for t in ["noOverlap", "simpleOverlap"]:
+ scenarios.add(Scenario(cells_per_block=cells_per_block,
+ cuda_blocks=(128, 1, 1),
+ time_step_strategy=t,
+ inner_outer_split=(1, 1, 1),
+ cuda_enabled_mpi=cuda_enabled_mpi,
+ outer_iterations=1,
+ timesteps=50,
+ blocks=blocks,
+ boundary_setup=True,
+ db_file_name="strongScalingUniformGridOneBlock.sqlite3"))
+
+
def single_gpu_benchmark():
"""Benchmarks only the LBM compute kernel"""
wlb.log_info_on_root("Running single GPU benchmarks")
wlb.log_info_on_root("")
- gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
+ gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 40))
gpu_mem = gpu_mem_gb * (2 ** 30)
gpu_type = os.environ.get('GPU_TYPE')
@@ -214,12 +296,8 @@ def single_gpu_benchmark():
additional_info['gpu_type'] = gpu_type
scenarios = wlb.ScenarioManager()
- block_sizes = [(i, i, i) for i in (32, 64, 128, 256)]
- cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), (512, 1, 1),
- (32, 2, 1), (64, 2, 1), (128, 2, 1), (256, 2, 1),
- (32, 4, 1), (64, 4, 1), (128, 4, 1),
- (32, 8, 1), (64, 8, 1),
- (32, 16, 1)]
+ block_sizes = [(i, i, i) for i in (128, 256, 320)]
+ cuda_blocks = [(128, 1, 1), ]
for block_size in block_sizes:
for cuda_block_size in cuda_blocks:
# cuda_block_size = (256, 1, 1) and block_size = (64, 64, 64) would be cut to cuda_block_size = (64, 1, 1)
@@ -266,4 +344,14 @@ def validation_run():
# performance of compute kernel (no communication)
# overlap_benchmark() # benchmarks different communication overlap options
# profiling() # run only two timesteps on a smaller domain for profiling only
-validation_run()
+# validation_run()
+
+if BENCHMARK == 0:
+ single_gpu_benchmark()
+elif BENCHMARK == 1:
+ weak_scaling_overlap(True)
+elif BENCHMARK == 2:
+ strong_scaling_overlap(True)
+else:
+ validation_run()
+
diff --git a/apps/showcases/Thermocapillary/benchmark.py b/apps/showcases/Thermocapillary/benchmark.py
index 6493eda5..a08e2688 100755
--- a/apps/showcases/Thermocapillary/benchmark.py
+++ b/apps/showcases/Thermocapillary/benchmark.py
@@ -6,6 +6,11 @@
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars
+try:
+ import machinestate as ms
+except ImportError:
+ ms = None
+
def num_time_steps(block_size, time_steps_for_256_block=50):
# Number of time steps run for a workload of 256^3 cells per process
@@ -137,6 +142,14 @@ def write_benchmark_results(self, **kwargs):
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
+ if ms:
+ state = ms.MachineState(extended=False, anonymous=True)
+ state.generate() # generate subclasses
+ state.update() # read information
+ data["MachineState"] = str(state.get())
+ else:
+ print("MachineState module is not available. MachineState was not saved")
+
sequenceValuesToScalars(data)
csv_file = f"thermocapillary_benchmark.csv"
diff --git a/python/lbmpy_walberla/packing_kernels.py b/python/lbmpy_walberla/packing_kernels.py
index 8a872803..862a6cda 100644
--- a/python/lbmpy_walberla/packing_kernels.py
+++ b/python/lbmpy_walberla/packing_kernels.py
@@ -7,8 +7,10 @@
from jinja2 import Environment, PackageLoader, StrictUndefined
from pystencils import Assignment, CreateKernelConfig, create_kernel, Field, FieldType, fields, Target
+from pystencils.astnodes import LoopOverCoordinate
+from pystencils.integer_functions import int_div
from pystencils.stencil import offset_to_direction_string
-from pystencils.typing import TypedSymbol
+from pystencils.typing import TypedSymbol, BasicType, PointerType, FieldPointerSymbol
from pystencils.stencil import inverse_direction
from pystencils.bit_masks import flag_cond
@@ -18,7 +20,7 @@
from lbmpy.stencils import LBStencil
from pystencils_walberla.cmake_integration import CodeGenerationContext
-from pystencils_walberla.kernel_selection import KernelFamily, KernelCallNode, SwitchNode
+from pystencils_walberla.kernel_selection import KernelFamily, KernelCallNode, SwitchNode, AbortNode
from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
from pystencils_walberla.utility import config_from_context
@@ -101,6 +103,23 @@ def __init__(self, stencil, streaming_pattern, class_name, config: CreateKernelC
self.accessors = [get_accessor(streaming_pattern, t) for t in get_timesteps(streaming_pattern)]
self.mask_field = fields(f'mask : uint32 [{self.dim}D]', layout=src_field.layout)
+ self.block_wise = True
+ if not self.inplace or not self.config.target == Target.GPU:
+ self.block_wise = False
+
+ self.index = TypedSymbol("index", dtype=BasicType(np.int64))
+ self.index_shape = TypedSymbol("_size_0", dtype=BasicType(np.int64))
+ self.src_ptr_type = PointerType(self.src_field.dtype, const=True, restrict=True, double_pointer=True)
+ self.src_ptr = FieldPointerSymbol(self.src_field.name, self.src_field.dtype, const=True)
+ self.dst_ptr_type = PointerType(self.dst_field.dtype, const=False, restrict=True, double_pointer=True)
+ self.dst_ptr = FieldPointerSymbol(self.dst_field.name, self.dst_field.dtype, const=False)
+
+ self.data_src = TypedSymbol(f"_data_{self.src_field.name}_dp", dtype=self.src_ptr_type)
+ self.data_dst = TypedSymbol(f"_data_{self.dst_field.name}_dp", dtype=self.dst_ptr_type)
+
+ self.f = sp.IndexedBase(self.data_src, shape=self.index_shape)
+ self.d = sp.IndexedBase(self.data_dst, shape=self.index_shape)
+
def create_uniform_kernel_families(self, kernels_dict=None):
kernels = dict() if kernels_dict is None else kernels_dict
@@ -115,6 +134,8 @@ def create_uniform_kernel_families(self, kernels_dict=None):
def create_nonuniform_kernel_families(self, kernels_dict=None):
kernels = dict() if kernels_dict is None else kernels_dict
+ kernels['localCopyRedistribute'] = self.get_local_copy_redistribute_kernel_family()
+ kernels['localPartialCoalescence'] = self.get_local_copy_partial_coalescence_kernel_family()
kernels['unpackRedistribute'] = self.get_unpack_redistribute_kernel_family()
kernels['packPartialCoalescence'] = self.get_pack_partial_coalescence_kernel_family()
kernels['zeroCoalescenceRegion'] = self.get_zero_coalescence_region_kernel_family()
@@ -231,7 +252,10 @@ def get_local_copy_direction_ast(self, comm_dir, timestep):
dir_string = offset_to_direction_string(comm_dir)
streaming_dirs = self.get_streaming_dirs(comm_dir)
src, dst = self._stream_out_accs(timestep)
- assignments = []
+ assignments = list()
+ if self.block_wise:
+ assignments.append(Assignment(self.src_ptr, self.f[self.index]))
+ assignments.append(Assignment(self.dst_ptr, self.d[self.index]))
dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
if len(dir_indices) == 0:
return None
@@ -283,15 +307,59 @@ def get_unpack_redistribute_ast(self, comm_dir, timestep):
return create_kernel(assignments, config=config)
def get_unpack_redistribute_kernel_family(self):
- return self._construct_directionwise_kernel_family(self.get_unpack_redistribute_ast)
+ return self._construct_directionwise_kernel_family(self.get_unpack_redistribute_ast,
+ exclude_time_step=Timestep.EVEN)
def get_local_copy_redistribute_ast(self, comm_dir, timestep):
- # TODO
- raise NotImplementedError()
+ assert not all(d == 0 for d in comm_dir)
+ ctr = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(self.stencil.D)]
+
+ dir_string = offset_to_direction_string(comm_dir)
+ streaming_dirs = self.get_streaming_dirs(inverse_direction(comm_dir))
+ dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+ if len(dir_indices) == 0:
+ return None
+
+ # for inplace streaming the dst (fine grid) must always be on odd state
+ dst_timestep = Timestep.ODD if self.inplace else Timestep.BOTH
+
+ _, dst = self._stream_out_accs(dst_timestep)
+ src, _ = self._stream_out_accs(timestep)
+
+ src_abs = self.src_field.new_field_with_different_name(self.src_field.name)
+ src_abs.field_type = FieldType.CUSTOM
+
+ orthos = self.orthogonal_principals(comm_dir)
+ sub_dirs = self.contained_principals(comm_dir)
+ orthogonal_combinations = self.linear_combinations(orthos)
+ subdir_combinations = self.linear_combinations_nozero(sub_dirs)
+ second_gl_dirs = [o + s for o, s in product(orthogonal_combinations, subdir_combinations)]
+ negative_dir_correction = np.array([(1 if d == -1 else 0) for d in comm_dir])
+ assignments = []
+ for offset in orthogonal_combinations:
+ o = offset + negative_dir_correction
+ for d in range(self.values_per_cell):
+ field_acc = dst[d].get_shifted(*o)
+ src_access = [int_div(ctr[i], 2) + o for i, o in enumerate(src[d].offsets)]
+ assignments.append(Assignment(field_acc, src_abs.absolute_access(src_access, (d, ))))
+
+ for offset in second_gl_dirs:
+ o = offset + negative_dir_correction
+ for d in dir_indices:
+ field_acc = dst[d].get_shifted(*o)
+ src_access = [int_div(ctr[i], 2) + o for i, o in enumerate(src[d].offsets)]
+ assignments.append(Assignment(field_acc, src_abs.absolute_access(src_access, (d, ))))
+
+ function_name = f'localCopyRedistribute_{dir_string}' + timestep_suffix(timestep)
+ iteration_slice = tuple(slice(None, None, 2) for _ in range(self.dim))
+ config = CreateKernelConfig(function_name=function_name, iteration_slice=iteration_slice,
+ data_type=self.data_type, ghost_layers=0, allow_double_writes=True,
+ cpu_openmp=self.config.cpu_openmp, target=self.config.target)
+
+ return create_kernel(assignments, config=config)
def get_local_copy_redistribute_kernel_family(self):
- # TODO
- raise NotImplementedError()
+ return self._construct_directionwise_kernel_family(self.get_local_copy_redistribute_ast)
# --------------------------- Pack / Unpack / LocalCopy Fine to Coarse ---------------------------------------------
@@ -322,7 +390,8 @@ def get_pack_partial_coalescence_ast(self, comm_dir, timestep):
return ast
def get_pack_partial_coalescence_kernel_family(self):
- return self._construct_directionwise_kernel_family(self.get_pack_partial_coalescence_ast)
+ return self._construct_directionwise_kernel_family(self.get_pack_partial_coalescence_ast,
+ exclude_time_step=Timestep.ODD)
def get_unpack_coalescence_ast(self, comm_dir, timestep):
config = replace(self.config, ghost_layers=0)
@@ -370,12 +439,53 @@ def get_zero_coalescence_region_ast(self, comm_dir, timestep):
def get_zero_coalescence_region_kernel_family(self):
return self._construct_directionwise_kernel_family(self.get_zero_coalescence_region_ast)
- # TODO
def get_local_copy_partial_coalescence_ast(self, comm_dir, timestep):
- raise NotImplementedError()
+ assert not all(d == 0 for d in comm_dir)
+ ctr = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(self.stencil.D)]
+
+ dir_string = offset_to_direction_string(comm_dir)
+ streaming_dirs = self.get_streaming_dirs(comm_dir)
+ dir_indices = sorted(self.stencil.index(d) for d in streaming_dirs)
+
+ if len(dir_indices) == 0:
+ return None
+ buffer = sp.symbols(f"b_:{self.values_per_cell}")
+
+ # for inplace streaming the src (fine grid) must always be on even state
+ src_timestep = Timestep.ODD if self.inplace else Timestep.BOTH
+
+ src, _ = self._stream_in_accs(src_timestep)
+ _, dst = self._stream_in_accs(timestep.next())
+ mask = self.mask_field
+
+ dst_abs = self.dst_field.new_field_with_different_name(self.dst_field.name)
+ dst_abs.field_type = FieldType.CUSTOM
+
+ coalescence_factor = sp.Rational(1, 2 ** self.dim)
+
+ offsets = list(product(*((0, 1) for _ in comm_dir)))
+ assignments = []
+ for i, d in enumerate(dir_indices):
+ acc = 0
+ for o in offsets:
+ acc += flag_cond(d, mask[o], src[d].get_shifted(*o))
+ assignments.append(Assignment(buffer[i], acc))
+
+ for i, d in enumerate(dir_indices):
+ index = dst[d].index
+ dst_access = [int_div(ctr[i], 2) + o for i, o in enumerate(dst[d].offsets)]
+ assignments.append(Assignment(dst_abs.absolute_access(dst_access, index),
+ dst_abs.absolute_access(dst_access, index) + coalescence_factor * buffer[i]))
+
+ iteration_slice = tuple(slice(None, None, 2) for _ in range(self.dim))
+ config = replace(self.config, iteration_slice=iteration_slice, ghost_layers=0)
+
+ ast = create_kernel(assignments, config=config)
+ ast.function_name = f'localPartialCoalescence_{dir_string}' + timestep_suffix(timestep)
+ return ast
def get_local_copy_partial_coalescence_kernel_family(self):
- raise NotImplementedError()
+ return self._construct_directionwise_kernel_family(self.get_local_copy_partial_coalescence_ast)
# ------------------------------------------ Utility ---------------------------------------------------------------
@@ -425,7 +535,7 @@ def linear_combinations_nozero(self, vectors):
# --------------------------- Private Members ----------------------------------------------------------------------
- def _construct_directionwise_kernel_family(self, create_ast_callback):
+ def _construct_directionwise_kernel_family(self, create_ast_callback, exclude_time_step=None):
subtrees = []
direction_symbol = TypedSymbol('dir', dtype='stencil::Direction')
for t in get_timesteps(self.streaming_pattern):
@@ -439,7 +549,10 @@ def _construct_directionwise_kernel_family(self, create_ast_callback):
continue
kernel_call = KernelCallNode(ast)
cases_dict[f"stencil::{dir_string}"] = kernel_call
- subtrees.append(SwitchNode(direction_symbol, cases_dict))
+ if exclude_time_step is not None and t == exclude_time_step:
+ subtrees.append(AbortNode("This function can not be called! Please contact the waLBerla team"))
+ else:
+ subtrees.append(SwitchNode(direction_symbol, cases_dict))
if not self.inplace:
tree = subtrees[0]
diff --git a/python/lbmpy_walberla/storage_specification.py b/python/lbmpy_walberla/storage_specification.py
index 60fd96d2..ae56ff73 100644
--- a/python/lbmpy_walberla/storage_specification.py
+++ b/python/lbmpy_walberla/storage_specification.py
@@ -113,8 +113,8 @@ def generate_lbm_storage_specification(generation_context: CodeGenerationContext
'kernels': kernels,
'direction_sizes': cg.get_direction_sizes(),
'src_field': cg.src_field,
- 'dst_field': cg.dst_field
-
+ 'dst_field': cg.dst_field,
+ 'block_wise': cg.block_wise
}
if nonuniform:
jinja_context['mask_field'] = cg.mask_field
diff --git a/python/lbmpy_walberla/sweep_collection.py b/python/lbmpy_walberla/sweep_collection.py
index bc8bdda4..164dd94d 100644
--- a/python/lbmpy_walberla/sweep_collection.py
+++ b/python/lbmpy_walberla/sweep_collection.py
@@ -1,13 +1,17 @@
from dataclasses import replace
from typing import Dict
+from jinja2 import Environment, PackageLoader, StrictUndefined
+
import sympy as sp
import numpy as np
-from pystencils import Target, create_kernel
+from pystencils import Target, create_kernel, Assignment
+from pystencils.bit_masks import flag_cond
from pystencils.config import CreateKernelConfig
-from pystencils.field import Field
+from pystencils.field import Field, fields
from pystencils.simp import add_subexpressions_for_field_reads
+from pystencils.typing import BasicType, PointerType, FieldPointerSymbol, TypedSymbol, CastFunc
from lbmpy.advanced_streaming import is_inplace, get_accessor, Timestep
from lbmpy.creationfunctions import LbmCollisionRule, LBMConfig, LBMOptimisation
@@ -17,8 +21,8 @@
from pystencils_walberla.kernel_selection import KernelCallNode, KernelFamily
from pystencils_walberla.utility import config_from_context
-from pystencils_walberla import generate_sweep_collection
-from lbmpy_walberla.utility import create_pdf_field
+from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
+from lbmpy_walberla.utility import create_pdf_field, timestep_suffix
from .alternating_sweeps import EvenIntegerCondition
from .function_generator import kernel_family_function_generator
@@ -59,7 +63,6 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli
field_layout=lbm_optimisation.field_layout)
config = replace(config, ghost_layers=0)
-
function_generators = []
def family(name):
@@ -68,6 +71,13 @@ def family(name):
def generator(name, kernel_family):
return kernel_family_function_generator(name, kernel_family, namespace='lbm', max_threads=max_threads)
+ all_fields = collision_rule.bound_fields.union(collision_rule.free_fields)
+ all_fields.update({src_field, dst_field})
+ all_fields = list(sorted(all_fields, key=lambda e: str(e)))
+
+ bw_stream_collide = block_wise_stream_collide(class_name, collision_rule, lbm_config, src_field, dst_field, config)
+ bw_stream = block_wise_stream(class_name, collision_rule, lbm_config, src_field, dst_field, config)
+
function_generators.append(generator('streamCollide', family("streamCollide")))
function_generators.append(generator('collide', family("collide")))
function_generators.append(generator('stream', family("stream")))
@@ -87,7 +97,65 @@ def generator(name, kernel_family):
namespace='lbm', max_threads=max_threads)
function_generators.append(getter_generator)
- generate_sweep_collection(ctx, class_name, function_generators, refinement_scaling)
+ contexts_function_generators = list()
+ for fct in function_generators:
+ contexts_function_generators.append(fct())
+
+ namespaces = set([context['namespace'] for context in contexts_function_generators])
+ assert len(namespaces) == 1, "All function_generators must output the same namespace!"
+ namespace = namespaces.pop()
+
+ headers = set()
+ for context in contexts_function_generators:
+ for header in context['interface_spec'].headers:
+ headers.add(header)
+ for header in context['kernel'].get_headers():
+ headers.add(header)
+
+ kernel_list = list()
+ for context in contexts_function_generators:
+ kernel_list.append(context['kernel'])
+
+ kernels = list()
+ for context in contexts_function_generators:
+ kernels.append({
+ 'kernel': context['kernel'],
+ 'function_name': context['function_name'],
+ 'ghost_layers_to_include': 'ghost_layers',
+ 'field': context['field'],
+ 'max_threads': context['max_threads']
+ })
+
+ target = kernels[0]['kernel'].target
+
+ jinja_context = {
+ 'block_stream_collide': bw_stream_collide,
+ 'block_stream': bw_stream,
+ 'all_fields': all_fields,
+ 'pdf_field': src_field,
+ 'kernel_list': kernel_list,
+ 'kernels': kernels,
+ 'namespace': namespace,
+ 'class_name': class_name,
+ 'headers': headers,
+ 'target': target.name.lower(),
+ 'is_gpu': target == Target.GPU,
+ 'parameter_scaling': refinement_scaling,
+ 'stencil_name': lbm_config.stencil.name,
+ 'D': lbm_config.stencil.D,
+ 'Q': lbm_config.stencil.Q,
+ 'inplace': is_inplace(lbm_config.streaming_pattern)
+ }
+
+ env = Environment(loader=PackageLoader('lbmpy_walberla'), undefined=StrictUndefined)
+ add_pystencils_filters_to_jinja_env(env)
+
+ header = env.get_template("LBMSweepCollection.tmpl.h").render(**jinja_context)
+ source = env.get_template("LBMSweepCollection.tmpl.cpp").render(**jinja_context)
+
+ source_extension = "cu" if target == Target.GPU and ctx.cuda else "cpp"
+ ctx.write_file(f"{class_name}.h", header)
+ ctx.write_file(f"{class_name}.{source_extension}", source)
class RefinementScaling:
@@ -175,7 +243,7 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
default_dtype = config.data_type.default_factory()
- get_timestep = {"field_name": pdfs.name, "function": "getTimestep"}
+ get_timestep = {"field_name": pdfs.name, "function": "getTimestepPlusOne"}
temporary_fields = ()
field_swaps = ()
@@ -252,3 +320,104 @@ def get_getter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
family = KernelFamily(node, class_name, temporary_fields=temporary_fields, field_swaps=field_swaps)
return family
+
+
+def block_wise_stream_collide(class_name, collision_rule, lbm_config, src_field, dst_field, config):
+
+ if not is_inplace(lbm_config.streaming_pattern):
+ return None
+ else:
+ ast_even, all_fields = create_block_wise_ast(collision_rule, src_field, dst_field,
+ lbm_config, Timestep.EVEN, config, False)
+ even_call = KernelCallNode(ast_even)
+ ast_odd, _ = create_block_wise_ast(collision_rule, src_field, dst_field,
+ lbm_config, Timestep.ODD, config, False)
+ odd_call = KernelCallNode(ast_odd)
+ tree = EvenIntegerCondition('timestep', even_call, odd_call, parameter_dtype=np.uint8)
+
+ family = KernelFamily(tree, class_name)
+
+ indexed_to_field_name = dict()
+ for field in all_fields:
+ indexed_to_field_name[field.name] = f"_data_{field.name}_dp"
+
+ context = {
+ 'kernel': family,
+ 'all_fields': all_fields,
+ 'namespace': 'lbm',
+ 'function_name': 'blockStreamCollide',
+ 'indexed_to_field_name': indexed_to_field_name,
+ 'max_threads': None
+ }
+
+ return context
+
+
+def block_wise_stream(class_name, collision_rule, lbm_config, src_field, dst_field, config):
+
+ if not is_inplace(lbm_config.streaming_pattern):
+ return None
+ else:
+ ast_even, all_fields = create_block_wise_ast(collision_rule, src_field, dst_field,
+ lbm_config, Timestep.EVEN, config, True)
+ even_call = KernelCallNode(ast_even)
+ ast_odd, _ = create_block_wise_ast(collision_rule, src_field, dst_field,
+ lbm_config, Timestep.ODD, config, True)
+ odd_call = KernelCallNode(ast_odd)
+ tree = EvenIntegerCondition('timestep', even_call, odd_call, parameter_dtype=np.uint8)
+
+ family = KernelFamily(tree, class_name)
+
+ indexed_to_field_name = dict()
+ for field in all_fields:
+ indexed_to_field_name[field.name] = f"_data_{field.name}_dp"
+
+ context = {
+ 'kernel': family,
+ 'all_fields': all_fields,
+ 'namespace': 'lbm',
+ 'function_name': 'blockStream',
+ 'indexed_to_field_name': indexed_to_field_name,
+ 'max_threads': None
+ }
+
+ return context
+
+
+def create_block_wise_ast(collision_rule, src_field, dst_field, lbm_config, timestep, config, stream_only):
+ stencil = lbm_config.stencil
+ streaming_pattern = lbm_config.streaming_pattern
+ default_dtype = config.data_type.default_factory()
+ config = replace(config, gpu_indexing_params={})
+
+ accessor = get_accessor(streaming_pattern, timestep)
+
+ if stream_only:
+ update_rule = create_stream_only_kernel(stencil, src_field, dst_field, accessor)
+ else:
+ update_rule = create_lbm_kernel(collision_rule, src_field, dst_field, accessor, data_type=default_dtype)
+
+ bound_fields = update_rule.bound_fields
+ free_fields = update_rule.free_fields
+
+ all_fields = list(bound_fields.union(free_fields))
+ all_fields.sort(key=lambda field: field.name)
+
+ index = TypedSymbol("index", dtype=BasicType(np.int64))
+ index_shape = TypedSymbol("_size_0", dtype=BasicType(np.int64))
+
+ ass = list()
+ for field in all_fields:
+ const = True if field in free_fields else False
+ ptr_type = PointerType(field.dtype, const=const, restrict=True, double_pointer=True)
+ ptr = FieldPointerSymbol(field.name, field.dtype, const=const)
+ f = sp.IndexedBase(TypedSymbol(f"_data_{field.name}_dp", dtype=ptr_type), shape=index_shape)
+ ass.append(Assignment(ptr, f[index]))
+
+ update_rule = ass + update_rule.all_assignments
+
+ ast = create_kernel(update_rule, config=config)
+ base_name = "kernel_BlockStream" if stream_only else "kernel_BlockStreamCollide"
+ ast.function_name = base_name + timestep_suffix(timestep)
+ ast.assumed_inner_stride_one = config.cpu_vectorize_info['assume_inner_stride_one']
+ return ast, all_fields
diff --git a/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h b/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
index 453f78e0..4e19d069 100644
--- a/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
+++ b/python/lbmpy_walberla/templates/BoundaryCollection.tmpl.h
@@ -41,12 +41,12 @@ class {{class_name}}
enum Type { ALL = 0, INNER = 1, OUTER = 2 };
- {{class_name}}( {{- ["const shared_ptr & blocks", "BlockDataID flagID_", "BlockDataID pdfsID_", "FlagUID domainUID_", [kernel_list|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'pdfs'])], additional_constructor_arguments] | type_identifier_list -}} )
+ {{class_name}}( {{- ["const shared_ptr & blocks", "BlockDataID flagID_", "BlockDataID pdfsID_", "FlagUID domainUID_", [kernel_list|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize', 'pdfs'])], additional_constructor_arguments] | type_identifier_list -}} )
: blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_)
{
{% for object_name, boundary_class, kernel, additional_data_handler in zip(object_names, boundary_classes, kernel_list, additional_data_handlers) -%}
- {{object_name}} = std::make_shared< {{boundary_class}} >({{- ["blocks", [kernel|generate_function_collection_call(['indexVector', 'indexVectorSize', 'timestep', 'gpuStream'], use_field_ids=True)], additional_data_handler.constructor_argument_name] | type_identifier_list -}});
+ {{object_name}} = std::make_shared< {{boundary_class}} >({{- ["blocks", "pdfsID", [kernel|generate_function_collection_call(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize', 'pdfs', 'timestep', 'gpuStream'], use_field_ids=True)], additional_data_handler.constructor_argument_name] | type_identifier_list -}});
{% endfor %}
{% for object_name, flag_uid in zip(object_names, flag_uids) -%}
diff --git a/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.cpp b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.cpp
new file mode 100644
index 00000000..a691af5d
--- /dev/null
+++ b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.cpp
@@ -0,0 +1,156 @@
+//======================================================================================================================
+//
+// This file is part of waLBerla. waLBerla is free software: you can
+// redistribute it and/or modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation, either version 3 of
+// the License, or (at your option) any later version.
+//
+// waLBerla is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with waLBerla (see COPYING.txt). If not, see .
+//
+//! \\file {{class_name}}.cpp
+//! \\author pystencils
+//======================================================================================================================
+#include "{{class_name}}.h"
+
+{% if target is equalto 'cpu' -%}
+#define FUNC_PREFIX
+{%- elif target is equalto 'gpu' -%}
+#define FUNC_PREFIX __global__
+{%- endif %}
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wunused-variable"
+# pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning push
+#pragma warning( disable : 1599 )
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 191
+#else
+#pragma diag_suppress 191
+#endif
+#endif
+
+using namespace std;
+
+namespace walberla {
+namespace {{namespace}} {
+
+{%if block_stream_collide -%}
+{{block_stream_collide['kernel']|generate_definitions(target, block_stream_collide['max_threads'])}}
+{{block_stream['kernel']|generate_definitions(target, block_stream['max_threads'])}}
+{%endif%}
+
+{% for kernel in kernels %}
+{{kernel['kernel']|generate_definitions(target, kernel['max_threads'])}}
+{% endfor %}
+
+void {{class_name}}::blockStreamCollide({{- ["[[maybe_unused]] uint_t level", "[[maybe_unused]] uint8_t timestep", ["[[maybe_unused]] gpuStream_t stream"] if target == 'gpu' else []] | type_identifier_list -}})
+{
+ {%if block_stream_collide -%}
+
+ {%if target is equalto 'gpu' -%}
+ dim3 _grid = grid_[level];
+ dim3 _block = block_[level];
+
+ {%- for field in block_stream_collide['all_fields'] %}
+ {{field.dtype.c_name}} ** {{block_stream_collide['indexed_to_field_name'][field.name]}} = {{field.name}}PointersGPU[level];
+ {%- endfor %}
+
+ {% else %}
+
+ {%- for field in block_stream_collide['all_fields'] %}
+ {{field.dtype.c_name}} ** {{block_stream_collide['indexed_to_field_name'][field.name]}} = {{field.name}}Pointers[level].data();
+ {%- endfor %}
+
+ {%- endif %}
+ const int64_t _size_0 = size_0[level];
+ int64_t _size_{{block_stream_collide['all_fields'][0].name}}_0 = size_1;
+ int64_t _size_{{block_stream_collide['all_fields'][0].name}}_1 = size_2;
+ int64_t _size_{{block_stream_collide['all_fields'][0].name}}_2 = size_3;
+
+ {{block_stream_collide['kernel']|generate_field_strides()|indent(3)}}
+ {{block_stream_collide['kernel']|generate_refs_for_kernel_parameters(prefix="this->", parameters_to_ignore=["_size_0"], ignore_fields=True, parameter_registration=parameter_scaling, level_known=True)|indent(3)}}
+ {{block_stream_collide['kernel']|generate_call(stream='stream', plain_kernel_call=True)|indent(3)}}
+
+ {%endif%}
+}
+
+void {{class_name}}::ghostLayerPropagation({{- ["[[maybe_unused]] uint_t level", "[[maybe_unused]] uint8_t timestep", ["[[maybe_unused]] gpuStream_t stream"] if target == 'gpu' else []] | type_identifier_list -}})
+{
+ {%if block_stream_collide -%}
+
+ {{block_stream['kernel']|generate_field_strides()|indent(3)}}
+
+ {%if target is equalto 'gpu' -%}
+ auto parallelSection_ = parallelStreams_.parallelSection( stream );
+ for (auto it = glPropagationPDFs[level].begin(); it != glPropagationPDFs[level].end(); it++){
+ if(it->second.empty()){ continue;}
+
+ int64_t _size_0 = int64_c(it->second.size());
+ int64_t _size_{{pdf_field.name}}_0 = std::get<0>(it->first);
+ int64_t _size_{{pdf_field.name}}_1 = std::get<1>(it->first);
+ int64_t _size_{{pdf_field.name}}_2 = std::get<2>(it->first);
+
+ {{pdf_field.dtype.c_name}} ** _data_{{pdf_field.name}}_dp = glPropagationPDFsGPU[level][it->first];
+ dim3 _grid = glPropagationGrid_[level][it->first];
+ dim3 _block = glPropagationBlock_[level][it->first];
+ parallelSection_.run([&]( auto s ) {
+ {{block_stream['kernel']|generate_call(stream='s', plain_kernel_call=True)|indent(9)}}
+ });
+ }
+
+ {% else %}
+
+ for (auto it = glPropagationPDFs[level].begin(); it != glPropagationPDFs[level].end(); it++){
+ if(it->second.empty()){ continue;}
+
+ int64_t _size_0 = int64_c(it->second.size());
+ int64_t _size_{{pdf_field.name}}_0 = std::get<0>(it->first);
+ int64_t _size_{{pdf_field.name}}_1 = std::get<1>(it->first);
+ int64_t _size_{{pdf_field.name}}_2 = std::get<2>(it->first);
+
+ {{pdf_field.dtype.c_name}} ** _data_{{pdf_field.name}}_dp = it->second.data();
+ {{block_stream['kernel']|generate_call(stream='s', plain_kernel_call=True)|indent(6)}}
+ }
+ {%- endif %}
+
+ {%endif%}
+}
+
+{% for kernel in kernels %}
+void {{class_name}}::{{kernel['function_name']}}( {{kernel['kernel']|generate_plain_parameter_list(ghost_layers=True)}} )
+{
+ {{kernel['kernel']|generate_call(ghost_layers_to_include=kernel['ghost_layers_to_include'], stream='stream')|indent(3)}}
+}
+void {{class_name}}::{{kernel['function_name']}}CellInterval( {{kernel['kernel']|generate_plain_parameter_list(cell_interval='ci')}})
+{
+ {{kernel['kernel']|generate_call(stream='stream', cell_interval='ci')|indent(3)}}
+}
+{% endfor %}
+
+
+} // namespace {{namespace}}
+} // namespace walberla
+
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+# pragma GCC diagnostic pop
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning pop
+#endif
diff --git a/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h
new file mode 100644
index 00000000..9471b6f2
--- /dev/null
+++ b/python/lbmpy_walberla/templates/LBMSweepCollection.tmpl.h
@@ -0,0 +1,511 @@
+//======================================================================================================================
+//
+// This file is part of waLBerla. waLBerla is free software: you can
+// redistribute it and/or modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation, either version 3 of
+// the License, or (at your option) any later version.
+//
+// waLBerla is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with waLBerla (see COPYING.txt). If not, see .
+//
+//! \\file {{class_name}}.h
+//! \\author pystencils
+//======================================================================================================================
+
+#pragma once
+
+#include "blockforest/StructuredBlockForest.h"
+#include "blockforest/BlockID.h"
+#include "blockforest/Block.h"
+
+#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
+#include "core/Macros.h"
+
+#include "field/AddToStorage.h"
+#include "field/FlagField.h"
+#include "field/iterators/FieldIterator.h"
+
+{% if target is equalto 'gpu' -%}
+#include "gpu/AddGPUFieldToStorage.h"
+#include "gpu/GPUField.h"
+#include "gpu/ParallelStreams.h"
+{%- endif %}
+
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "domain_decomposition/StructuredBlockStorage.h"
+
+#include "field/SwapableCompare.h"
+#include "field/GhostLayerField.h"
+
+#include "stencil/Directions.h"
+#include "stencil/{{stencil_name}}.h"
+
+#include
+#include
+
+{% for header in headers %}
+#include {{header}}
+{% endfor %}
+
+using namespace std::placeholders;
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wunused-parameter"
+# pragma GCC diagnostic ignored "-Wreorder"
+#endif
+
+namespace walberla {
+namespace {{namespace}} {
+
+class {{class_name}}
+{
+ public:
+ enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+ using sizeTuple = std::tuple;
+
+ {{class_name}}(const shared_ptr< StructuredBlockForest > & blocks, {{kernel_list|generate_constructor_parameters}}, const Cell & outerWidth=Cell(1, 1, 1))
+ : blocks_(blocks), {{ kernel_list|generate_constructor_initializer_list(parameter_registration=parameter_scaling) }}, outerWidth_(outerWidth)
+ {
+
+ {{kernel_list|generate_constructor(parameter_registration=parameter_scaling) |indent(6)}}
+ validInnerOuterSplit_ = true;
+
+ for (auto& iBlock : *blocks)
+ {
+ if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 ||
+ int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 ||
+ int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
+ validInnerOuterSplit_ = false;
+ }
+ }
+
+ void initialiseBlockPointer()
+ {
+ {%if block_stream_collide -%}
+ blockWise_ = true;
+
+ size_0.resize(blocks_->getNumberOfLevels());
+ {%- for field in block_stream_collide['all_fields'] %}
+ {{field.name}}Pointers.resize(blocks_->getNumberOfLevels());
+ {%if target is equalto 'gpu' -%} {{field.name}}PointersGPU.resize(blocks_->getNumberOfLevels()); {% endif %}
+ {%- endfor %}
+
+ {%if target is equalto 'gpu' -%} block_.resize(blocks_->getNumberOfLevels()); {% endif %}
+ {%if target is equalto 'gpu' -%} grid_.resize(blocks_->getNumberOfLevels()); {% endif %}
+
+ glPropagationPDFs.resize(blocks_->getNumberOfLevels());
+ {%if target is equalto 'gpu' -%} glPropagationPDFsGPU.resize(blocks_->getNumberOfLevels()); {% endif %}
+
+ {%if target is equalto 'gpu' -%} glPropagationBlock_.resize(blocks_->getNumberOfLevels()); {% endif %}
+ {%if target is equalto 'gpu' -%} glPropagationGrid_.resize(blocks_->getNumberOfLevels()); {% endif %}
+
+ for( auto it = blocks_->begin(); it != blocks_->end(); ++it )
+ {
+ auto* local = dynamic_cast< Block* >(it.get());
+ {%- for field in block_stream_collide['all_fields'] %}
+ auto {{field.name}} = local->getData< {{field | field_type(is_gpu=is_gpu)}} >({{field.name}}ID);
+ {%- endfor %}
+
+ size_1 = int64_c({{block_stream_collide['all_fields'][0].name}}->xSize());
+ size_2 = int64_c({{block_stream_collide['all_fields'][0].name}}->ySize());
+ size_3 = int64_c({{block_stream_collide['all_fields'][0].name}}->zSize());
+
+ {%- for field in block_stream_collide['all_fields'] %}
+
+ stride_{{field.name}}_0 = int64_c({{field.name}}->xStride());
+ stride_{{field.name}}_1 = int64_c({{field.name}}->yStride());
+ stride_{{field.name}}_2 = int64_c({{field.name}}->zStride());
+ stride_{{field.name}}_3 = int64_c(1 * int64_c({{field.name}}->fStride()));
+
+ {%- endfor %}
+ break;
+ }
+
+ for( auto it = blocks_->begin(); it != blocks_->end(); ++it )
+ {
+ auto* local = dynamic_cast< Block* >(it.get());
+ const uint_t level = local->getLevel();
+ {%- for field in block_stream_collide['all_fields'] %}
+ auto {{field.name}} = local->getData< {{field | field_type(is_gpu=is_gpu)}} >({{field.name}}ID);
+ {{field.name}}Pointers[level].emplace_back({{field.name}}->dataAt(0, 0, 0, 0));
+ {%- endfor %}
+
+
+ for(auto dir = stencil::{{stencil_name}}::beginNoCenter(); dir != stencil::{{stencil_name}}::end(); ++dir){
+ uint_t nSecIdx = blockforest::getBlockNeighborhoodSectionIndex(*dir);
+ // Propagate on ghost layers shadowing coarse or no blocks
+ if(local->neighborhoodSectionHasLargerBlock(nSecIdx)){
+ CellInterval ci;
+ {{pdf_field.name}}->getGhostRegion(*dir, ci, 1);
+ sizeTuple dirTuple = std::make_tuple(int64_c(ci.xSize()), int64_c(ci.ySize()), int64_c(ci.zSize()));
+ glPropagationPDFs[level][dirTuple].emplace_back({{pdf_field.name}}->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0));
+ }
+ }
+ }
+
+ for (uint_t level = 0; level < blocks_->getNumberOfLevels(); level++) {
+ size_0[level] = int64_c({{pdf_field.name}}Pointers[level].size());
+
+ {%if target is equalto 'gpu' -%}
+
+ int64_t indexingX = size_1 * size_0[level];
+ int64_t indexingY = size_2;
+ int64_t indexingZ = size_3;
+
+ int64_t cudaBlockSize0 = 128;
+ int64_t cudaBlockSize1 = 1;
+ int64_t cudaBlockSize2 = 1;
+
+ block_[level] = dim3((unsigned int)((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)), (unsigned int)((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))), (unsigned int)((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))));
+ grid_[level] = dim3((unsigned int)(( (indexingX) % (((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) == 0 ? (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) : ( (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) ) +1 )), (unsigned int)(( (indexingY) % (((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) == 0 ? (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) : ( (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) ) +1 )), (unsigned int)(( (indexingZ) % (((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) == 0 ? (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) : ( (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) ) +1 )));
+
+ {%- for field in block_stream_collide['all_fields'] %}
+
+ WALBERLA_GPU_CHECK(gpuMalloc( (void**)&{{field.name}}PointersGPU[level], sizeof({{pdf_field.dtype.c_name}}* ) * {{field.name}}Pointers[level].size() ));
+ WALBERLA_GPU_CHECK(gpuMemcpy( {{field.name}}PointersGPU[level], &{{field.name}}Pointers[level][0], sizeof({{pdf_field.dtype.c_name}} *) * {{field.name}}Pointers[level].size(), gpuMemcpyHostToDevice ));
+
+ {%- endfor %}
+
+ for (auto it = glPropagationPDFs[level].begin(); it != glPropagationPDFs[level].end(); it++){
+ if(it->second.empty()){ continue;}
+
+ indexingX = std::get<0>(it->first) * int64_c(it->second.size());
+ indexingY = std::get<1>(it->first);
+ indexingZ = std::get<2>(it->first);
+
+ cudaBlockSize0 = 32;
+ cudaBlockSize1 = 1;
+ cudaBlockSize2 = 1;
+
+ glPropagationBlock_[level][it->first] = dim3((unsigned int)((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)), (unsigned int)((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))), (unsigned int)((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))));
+ glPropagationGrid_[level][it->first] = dim3((unsigned int)(( (indexingX) % (((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) == 0 ? (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) : ( (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) ) +1 )), (unsigned int)(( (indexingY) % (((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) == 0 ? (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) : ( (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) ) +1 )), (unsigned int)(( (indexingZ) % (((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) == 0 ? (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) : ( (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) ) +1 )));
+
+ WALBERLA_GPU_CHECK(gpuMalloc( (void**)&glPropagationPDFsGPU[level][it->first], sizeof({{pdf_field.dtype.c_name}}* ) * it->second.size() ))
+ WALBERLA_GPU_CHECK(gpuMemcpy( glPropagationPDFsGPU[level][it->first], &it->second[0], sizeof({{pdf_field.dtype.c_name}}* ) * it->second.size(), gpuMemcpyHostToDevice ))
+ }
+
+ {% endif %}
+ }
+ {% endif %}
+
+ };
+
+ {%if block_stream_collide -%}
+ ~{{class_name}}() {
+ {%if target is equalto 'gpu' -%}
+ for (uint_t level = 0; level < blocks_->getNumberOfLevels(); level++)
+ {
+ {%- for field in block_stream_collide['all_fields'] %}
+ if(!{{field.name}}Pointers[level].empty()){
+ WALBERLA_GPU_CHECK(gpuFree({{field.name}}PointersGPU[level]))
+ }
+ {%- endfor %}
+
+ for (auto it = glPropagationPDFs[level].begin(); it != glPropagationPDFs[level].end(); it++){
+ if(it->second.empty()){ continue;}
+ WALBERLA_GPU_CHECK(gpuFree(glPropagationPDFsGPU[level][it->first]))
+ }
+ }
+ {%- endif %}
+ }
+ {% else %}
+ {{ kernel_list| generate_destructor(class_name) |indent(4) }}
+ {% endif %}
+
+
+ /*************************************************************************************
+ * Internal Function Definitions with raw Pointer
+ *************************************************************************************/
+
+ void blockStreamCollide({{- ["uint_t level", "uint8_t timestep", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}});
+ void ghostLayerPropagation({{- ["uint_t level", "uint8_t timestep", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}});
+ bool blockWise() {return blockWise_;};
+
+ {%- for kernel in kernels %}
+ static void {{kernel['function_name']}} ({{kernel['kernel']|generate_plain_parameter_list(ghost_layers=0, stream="nullptr")}});
+ static void {{kernel['function_name']}}CellInterval ({{kernel['kernel']|generate_plain_parameter_list(cell_interval='ci', stream="nullptr")}});
+ {% endfor %}
+
+ /*************************************************************************************
+ * Function Definitions for external Usage
+ *************************************************************************************/
+
+ std::function blockStreamCollideFck({{- ["uint_t level", "uint8_t timestep", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+ {
+ return [{{- ["this", "level", "timestep", ["stream"] if target == 'gpu' else []] | type_identifier_list -}}](){
+ blockStreamCollide({{- ["level", "timestep", ["stream"] if target == 'gpu' else []] | type_identifier_list -}});
+ };
+ }
+
+ void streamCollideOverBlocks({{- ["uint_t level", "uint8_t timestep", ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
+ {
+ blockStreamCollide({{- ["level", "timestep", ["stream"] if target == 'gpu' else []] | type_identifier_list -}});
+ }
+
+
+ {%- for kernel in kernels %}
+
+ std::function {{kernel['function_name']}}()
+ {
+ return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", ] | type_identifier_list -}}); };
+ }
+
+ std::function {{kernel['function_name']}}({{- ["Type type", ] | type_identifier_list -}})
+ {
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
+ switch (type)
+ {
+ case Type::INNER:
+ return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
+ case Type::OUTER:
+ return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
+ default:
+ return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", ] | type_identifier_list -}}); };
+ }
+ }
+
+ std::function {{kernel['function_name']}}({{- ["Type type", "const cell_idx_t ghost_layers"] | type_identifier_list -}})
+ {
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
+ switch (type)
+ {
+ case Type::INNER:
+ return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", ] | type_identifier_list -}}); };
+ case Type::OUTER:
+ return [{{- ["this", ] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", ] | type_identifier_list -}}); };
+ default:
+ return [{{- ["this", "ghost_layers"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers"] | type_identifier_list -}}); };
+ }
+ }
+
+ {% if target is equalto 'gpu' -%}
+ std::function {{kernel['function_name']}}({{- ["Type type", "const cell_idx_t ghost_layers", "gpuStream_t gpuStream"] | type_identifier_list -}})
+ {
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
+ switch (type)
+ {
+ case Type::INNER:
+ return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+ case Type::OUTER:
+ return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+ default:
+ return [{{- ["this", "ghost_layers", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "ghost_layers", "gpuStream"] | type_identifier_list -}}); };
+ }
+ }
+
+ std::function {{kernel['function_name']}}({{- ["Type type", "gpuStream_t gpuStream"] | type_identifier_list -}})
+ {
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
+ switch (type)
+ {
+ case Type::INNER:
+ return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Inner({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+ case Type::OUTER:
+ return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}Outer({{- ["block", "gpuStream"] | type_identifier_list -}}); };
+ default:
+ return [{{- ["this", "gpuStream"] | type_identifier_list -}}](IBlock* block) { {{kernel['function_name']}}({{- ["block", "cell_idx_c(0)", "gpuStream"] | type_identifier_list -}}); };
+ }
+ }
+ {%- endif %}
+
+ void {{kernel['function_name']}}({{- ["IBlock * block",] | type_identifier_list -}})
+ {
+ const cell_idx_t ghost_layers = 0;
+ {% if target is equalto 'gpu' -%}
+ gpuStream_t gpuStream = nullptr;
+ {%- endif %}
+
+ {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+ {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+ {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+ {{kernel['function_name']}}({{kernel['kernel']|generate_function_collection_call(ghost_layers='ghost_layers')}});
+ {{kernel['kernel']|generate_swaps|indent(6)}}
+ }
+
+ void {{kernel['function_name']}}({{- ["IBlock * block", "const cell_idx_t ghost_layers"] | type_identifier_list -}})
+ {
+ {% if target is equalto 'gpu' -%}
+ gpuStream_t gpuStream = nullptr;
+ {%- endif %}
+
+ {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+ {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+ {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+ {{kernel['function_name']}}({{kernel['kernel']|generate_function_collection_call(ghost_layers='ghost_layers')}});
+ {{kernel['kernel']|generate_swaps|indent(6)}}
+ }
+
+ {% if target is equalto 'gpu' -%}
+ void {{kernel['function_name']}}({{- ["IBlock * block", "const cell_idx_t ghost_layers", "gpuStream_t gpuStream"] | type_identifier_list -}})
+ {
+ {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+ {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+ {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+ {{kernel['function_name']}}({{kernel['kernel']|generate_function_collection_call(ghost_layers='ghost_layers')}});
+ {{kernel['kernel']|generate_swaps|indent(6)}}
+ }
+ {%- endif %}
+
+ void {{kernel['function_name']}}CellInterval({{- ["IBlock * block", "const CellInterval & ci", ["gpuStream_t gpuStream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+ {
+ {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+ {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+ {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+ {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
+ {{kernel['kernel']|generate_swaps|indent(6)}}
+ }
+
+ void {{kernel['function_name']}}Inner({{- ["IBlock * block", ["gpuStream_t gpuStream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+ {
+ {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+ {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+ {{kernel['kernel']|generate_timestep_advancements(advance=False)|indent(6)}}
+
+ CellInterval inner = {{kernel['field']}}->xyzSize();
+ inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2]));
+
+ {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='inner')}});
+ }
+
+ void {{kernel['function_name']}}Outer({{- ["IBlock * block", ["gpuStream_t gpuStream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}})
+ {
+
+ {{kernel['kernel']|generate_block_data_to_field_extraction|indent(6)}}
+ {{kernel['kernel']|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True, parameter_registration=parameter_scaling)|indent(6)}}
+ {{kernel['kernel']|generate_timestep_advancements|indent(6)}}
+
+ if( layers_.empty() )
+ {
+ CellInterval ci;
+
+ {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false);
+ layers_.push_back(ci);
+ {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false);
+ layers_.push_back(ci);
+
+ {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false);
+ ci.expand(Cell(0, 0, -outerWidth_[2]));
+ layers_.push_back(ci);
+ {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false);
+ ci.expand(Cell(0, 0, -outerWidth_[2]));
+ layers_.push_back(ci);
+
+ {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false);
+ ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+ layers_.push_back(ci);
+ {{kernel['field']}}->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false);
+ ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2]));
+ layers_.push_back(ci);
+ }
+
+ {%if target is equalto 'gpu'%}
+ {
+ auto parallelSection_ = parallelStreams_.parallelSection( gpuStream );
+ for( auto & ci: layers_ )
+ {
+ parallelSection_.run([&]( auto s ) {
+ {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
+ });
+ }
+ }
+ {% else %}
+ for( auto & ci: layers_ )
+ {
+ {{kernel['function_name']}}CellInterval({{kernel['kernel']|generate_function_collection_call(cell_interval='ci')}});
+ }
+ {% endif %}
+
+ {{kernel['kernel']|generate_swaps|indent(9)}}
+ }
+ {% endfor %}
+
+ {%if target is equalto 'gpu'%}
+ void setOuterPriority(int priority)
+ {
+ parallelStreams_.setStreamPriority(priority);
+ }
+ {%endif%}
+
+ private:
+ shared_ptr< StructuredBlockForest > blocks_;
+ {{kernel_list|generate_members(parameter_registration=parameter_scaling)|indent(4)}}
+
+ Cell outerWidth_;
+ std::vector layers_;
+ bool validInnerOuterSplit_;
+ bool blockWise_{false};
+
+ {%if target is equalto 'gpu' -%}
+ gpu::ParallelStreams parallelStreams_;
+ {%- endif %}
+
+ {%if block_stream_collide -%}
+
+ std::vector size_0;
+
+ int64_t size_1;
+ int64_t size_2;
+ int64_t size_3;
+
+ {%- for field in block_stream_collide['all_fields'] %}
+ int64_t stride_{{field.name}}_0;
+ int64_t stride_{{field.name}}_1;
+ int64_t stride_{{field.name}}_2;
+ int64_t stride_{{field.name}}_3;
+ {% endfor %}
+
+ {%- for field in block_stream_collide['all_fields'] %}
+ std::vector> {{field.name}}Pointers;
+ {% endfor -%}
+
+ std::vector>> glPropagationPDFs;
+ {%if target is equalto 'gpu' -%}
+
+
+ {%- for field in block_stream_collide['all_fields'] %}
+ std::vector<{{field.dtype.c_name}} **> {{field.name}}PointersGPU;
+ {% endfor -%}
+
+ std::vector block_;
+ std::vector grid_;
+
+ std::vector> glPropagationPDFsGPU;
+
+ std::vector> glPropagationBlock_;
+ std::vector> glPropagationGrid_;
+ {%- endif %}
+ {%- endif %}
+};
+
+
+} // namespace {{namespace}}
+} // namespace walberla
+
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
+# pragma GCC diagnostic pop
+#endif
diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
index 92106cdd..9558d0d2 100644
--- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
+++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.cpp
@@ -19,12 +19,45 @@
#include "{{class_name}}.h"
+
+{% if target is equalto 'cpu' -%}
+#define FUNC_PREFIX
+{%- elif target is equalto 'gpu' -%}
+#define FUNC_PREFIX __global__
+#include "gpu/GPUWrapper.h"
+#include "gpu/GPUField.h"
+{%- endif %}
+
+#ifdef __GNUC__
+#define RESTRICT __restrict__
+#elif _MSC_VER
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wfloat-equal"
# pragma GCC diagnostic ignored "-Wshadow"
# pragma GCC diagnostic ignored "-Wconversion"
# pragma GCC diagnostic ignored "-Wunused-variable"
+# pragma GCC diagnostic ignored "-Wignored-qualifiers"
+#endif
+
+#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL )
+#pragma warning push
+#pragma warning( disable : 1599 )
+#endif
+
+#ifdef __CUDACC__
+#pragma push
+#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
+#pragma nv_diag_suppress 191
+#else
+#pragma diag_suppress 191
+#endif
#endif
namespace walberla {
@@ -42,6 +75,8 @@ namespace {{namespace}} {
{{ kernels['localCopyDirection'] | generate_definitions }}
{% if nonuniform -%}
+ {{ kernels['localCopyRedistribute'] | generate_definitions }}
+ {{ kernels['localPartialCoalescence'] | generate_definitions }}
{{ kernels['unpackRedistribute'] | generate_definitions }}
{{ kernels['packPartialCoalescence'] | generate_definitions }}
{{ kernels['zeroCoalescenceRegion'] | generate_definitions }}
@@ -89,7 +124,7 @@ namespace {{namespace}} {
WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
{{kernels['localCopyAll']
- | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream')
+ | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream')
| indent(6) }}
}
@@ -115,6 +150,56 @@ namespace {{namespace}} {
{{kernels['unpackDirection'] | generate_call(cell_interval='ci', stream='stream') | indent(6) }}
}
+ void {{class_name}}::PackKernels::localCopyDirection(
+ {{- [src_field.dtype.c_name + "** _data_" + src_field.name + "_dp", dst_field.dtype.c_name + "** _data_" + dst_field.name + "_dp",
+ kernels['localCopyDirection'].kernel_selection_parameters,
+ ["gpuStream_t stream"] if is_gpu else [], "std::array& _sizes", "std::array& _strides"]
+ | type_identifier_list -}}
+ ) const {
+ {% if block_wise -%}
+
+ {% if target is equalto 'gpu' -%}
+
+ const int64_t indexingX = _sizes[0] * _sizes[1];
+ const int64_t indexingY = _sizes[2];
+ const int64_t indexingZ = _sizes[3];
+
+ const int64_t cudaBlockSize0 = 128;
+ const int64_t cudaBlockSize1 = 1;
+ const int64_t cudaBlockSize2 = 1;
+
+ const int64_t _size_0 = _sizes[0];
+
+ const int64_t _size_{{src_field.name}}_0 = _sizes[1];
+ const int64_t _size_{{src_field.name}}_1 = _sizes[2];
+ const int64_t _size_{{src_field.name}}_2 = _sizes[3];
+ const int64_t _size_{{dst_field.name}}_0 = _sizes[1];
+ const int64_t _size_{{dst_field.name}}_1 = _sizes[2];
+ const int64_t _size_{{dst_field.name}}_2 = _sizes[3];
+
+ const int64_t _stride_{{src_field.name}}_0 = _strides[0];
+ const int64_t _stride_{{src_field.name}}_1 = _strides[1];
+ const int64_t _stride_{{src_field.name}}_2 = _strides[2];
+ const int64_t _stride_{{src_field.name}}_3 = _strides[3];
+
+ const int64_t _stride_{{dst_field.name}}_0 = _strides[0];
+ const int64_t _stride_{{dst_field.name}}_1 = _strides[1];
+ const int64_t _stride_{{dst_field.name}}_2 = _strides[2];
+ const int64_t _stride_{{dst_field.name}}_3 = _strides[3];
+
+ const dim3 _block = dim3((unsigned int)((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)), (unsigned int)((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))), (unsigned int)((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))));
+ const dim3 _grid = dim3((unsigned int)(( (indexingX) % (((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) == 0 ? (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) : ( (int64_t)(indexingX) / (int64_t)(((1024 < ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)) ? 1024 : ((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))) ) +1 )), (unsigned int)(( (indexingY) % (((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) == 0 ? (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) : ( (int64_t)(indexingY) / (int64_t)(((1024 < ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))) ? 1024 : ((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))) ) +1 )), (unsigned int)(( (indexingZ) % (((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) == 0 ? (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) : ( (int64_t)(indexingZ) / (int64_t)(((64 < ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))))))) ? 64 : ((indexingZ < cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))) ? indexingZ : cudaBlockSize2*((int64_t)(cudaBlockSize0*cudaBlockSize1) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)*((indexingY < cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0)))) ? indexingY : cudaBlockSize1*((int64_t)(cudaBlockSize0) / (int64_t)(((indexingX < cudaBlockSize0) ? indexingX : cudaBlockSize0))))))))) ) +1 )));
+ {%- endif %}
+
+ {{kernels['localCopyDirection']
+ | generate_call(plain_kernel_call=True, stream='stream')
+ | indent(6) }}
+
+ {%else%}
+ WALBERLA_ABORT("Block wise local communication is not implemented")
+ {%- endif %}
+ }
+
void {{class_name}}::PackKernels::localCopyDirection(
{{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
"PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
@@ -123,16 +208,42 @@ namespace {{namespace}} {
| type_identifier_list -}}
) const
{
+ {% if not block_wise -%}
WALBERLA_ASSERT_EQUAL(srcInterval.xSize(), dstInterval.xSize())
WALBERLA_ASSERT_EQUAL(srcInterval.ySize(), dstInterval.ySize())
WALBERLA_ASSERT_EQUAL(srcInterval.zSize(), dstInterval.zSize())
{{kernels['localCopyDirection']
- | generate_call(cell_interval={src_field : 'srcInterval', dst_field : 'dstInterval'}, stream='stream')
+ | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream')
| indent(6) }}
+
+ {%else%}
+ WALBERLA_ABORT("Local communication is only implemented block wise")
+ {%- endif %}
}
+
{% if nonuniform -%}
+ void {{class_name}}::PackKernels::localCopyRedistribute(
+ {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
+ "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCopyRedistribute'].kernel_selection_parameters,
+ ["gpuStream_t stream"] if is_gpu else []]
+ | type_identifier_list -}}
+ ) const
+ {
+ {{kernels['localCopyRedistribute'] | generate_call(cell_interval={src_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream') | indent(6) }}
+ }
+
+ void {{class_name}}::PackKernels::localPartialCoalescence(
+ {{- [ "PdfField_T * " + src_field.name, "MaskField_T * " + mask_field.name, "CellInterval & srcInterval",
+ "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localPartialCoalescence'].kernel_selection_parameters,
+ ["gpuStream_t stream"] if is_gpu else []]
+ | type_identifier_list -}}
+ ) const
+ {
+ {{kernels['localPartialCoalescence'] | generate_call(cell_interval={src_field.name : 'srcInterval', mask_field.name : 'srcInterval', dst_field.name : 'dstInterval'}, stream='stream') | indent(6) }}
+ }
+
void {{class_name}}::PackKernels::unpackRedistribute(
{{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
"unsigned char * inBuffer", kernels['unpackDirection'].kernel_selection_parameters,
diff --git a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
index ad40a55e..e10644b6 100644
--- a/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
+++ b/python/lbmpy_walberla/templates/LbmStorageSpecification.tmpl.h
@@ -30,21 +30,13 @@
#include "stencil/{{stencil_name}}.h"
#include "stencil/Directions.h"
-{% if target is equalto 'cpu' -%}
-#define FUNC_PREFIX
-{%- elif target is equalto 'gpu' -%}
-#define FUNC_PREFIX __global__
+{% if target is equalto 'gpu' -%}
#include "gpu/GPUWrapper.h"
#include "gpu/GPUField.h"
{%- endif %}
-#ifdef __GNUC__
-#define RESTRICT __restrict__
-#elif _MSC_VER
-#define RESTRICT __restrict
-#else
-#define RESTRICT
-#endif
+#include
+
#if defined WALBERLA_CXX_COMPILER_IS_GNU || defined WALBERLA_CXX_COMPILER_IS_CLANG
#pragma GCC diagnostic push
@@ -120,6 +112,7 @@ class {{class_name}}
{%- endif %}
static const bool inplace = {% if inplace -%} true {%- else -%} false {%- endif -%};
+ static const bool blockWise = {% if block_wise -%} true {%- else -%} false {%- endif -%};
/**
* Packs all pdfs from the given cell interval to the send buffer.
@@ -178,6 +171,13 @@ class {{class_name}}
/** Copies data between two blocks on the same process.
* PDFs streaming aligned with the direction dir are copied from the sending interval onto the receiving interval.
* */
+ void localCopyDirection(
+ {{- [src_field.dtype.c_name + "** _data_" + src_field.name + "_dp", dst_field.dtype.c_name + "** _data_" + dst_field.name + "_dp",
+ kernels['localCopyDirection'].kernel_selection_parameters,
+ ["gpuStream_t stream"] if is_gpu else [], "std::array& _sizes", "std::array& _strides"]
+ | type_identifier_list -}}
+ ) const;
+
void localCopyDirection(
{{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
"PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
@@ -186,6 +186,7 @@ class {{class_name}}
| type_identifier_list -}}
) const;
+
/**
* Returns the number of bytes that will be packed from / unpacked to the cell interval
* when using packDirection / unpackDirection
@@ -209,6 +210,26 @@ class {{class_name}}
{% if nonuniform -%}
+ /**
+ * Local uniform redistribute.
+ * */
+ void localCopyRedistribute(
+ {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
+ "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localCopyRedistribute'].kernel_selection_parameters,
+ ["gpuStream_t stream = nullptr"] if is_gpu else []]
+ | type_identifier_list -}}
+ ) const;
+
+ /**
+ * Local partial coalescence.
+ * */
+ void localPartialCoalescence(
+ {{- [ "PdfField_T * " + src_field.name, "MaskField_T * " + mask_field.name, "CellInterval & srcInterval",
+ "PdfField_T * " + dst_field.name, "CellInterval & dstInterval", kernels['localPartialCoalescence'].kernel_selection_parameters,
+ ["gpuStream_t stream = nullptr"] if is_gpu else []]
+ | type_identifier_list -}}
+ ) const;
+
/**
* Unpacks and uniformly redistributes populations coming from a coarse block onto the fine grid.
* */
diff --git a/python/lbmpy_walberla/walberla_lbm_package.py b/python/lbmpy_walberla/walberla_lbm_package.py
index ea583181..7465a45b 100644
--- a/python/lbmpy_walberla/walberla_lbm_package.py
+++ b/python/lbmpy_walberla/walberla_lbm_package.py
@@ -3,7 +3,6 @@
from pystencils import Target, Field
from lbmpy.creationfunctions import LbmCollisionRule, LBMConfig, LBMOptimisation
-from lbmpy.relaxationrates import get_shear_relaxation_rate
from pystencils_walberla.cmake_integration import CodeGenerationContext
@@ -35,7 +34,7 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
cpu_openmp=cpu_openmp)
if nonuniform:
- omega = get_shear_relaxation_rate(method)
+ omega = lbm_config.relaxation_rate
refinement_scaling = RefinementScaling()
refinement_scaling.add_standard_relaxation_rate_scaling(omega)
else:
@@ -47,15 +46,15 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
macroscopic_fields=macroscopic_fields,
target=target, data_type=data_type,
cpu_openmp=cpu_openmp, cpu_vectorize_info=cpu_vectorize_info,
- max_threads=max_threads,
- set_pre_collision_pdfs=set_pre_collision_pdfs,
+ max_threads=max_threads, set_pre_collision_pdfs=set_pre_collision_pdfs,
**kernel_parameters)
spatial_shape = None
if lbm_optimisation.symbolic_field and lbm_optimisation.symbolic_field.has_fixed_shape:
spatial_shape = lbm_optimisation.symbolic_field.spatial_shape + (lbm_config.stencil.Q, )
- generate_boundary_collection(ctx, f'{name}BoundaryCollection', boundary_generators=boundaries,
- lb_method=method, field_name='pdfs', spatial_shape=spatial_shape,
- streaming_pattern=lbm_config.streaming_pattern,
- target=target, layout=lbm_optimisation.field_layout)
+ if boundaries is not None:
+ generate_boundary_collection(ctx, f'{name}BoundaryCollection', boundary_generators=boundaries,
+ lb_method=method, field_name='pdfs', spatial_shape=spatial_shape,
+ streaming_pattern=lbm_config.streaming_pattern,
+ target=target, layout=lbm_optimisation.field_layout)
diff --git a/python/pystencils_walberla/__init__.py b/python/pystencils_walberla/__init__.py
index f78f7fcf..158a2945 100644
--- a/python/pystencils_walberla/__init__.py
+++ b/python/pystencils_walberla/__init__.py
@@ -1,4 +1,4 @@
-from .boundary import generate_staggered_boundary, generate_staggered_flux_boundary
+from .boundary import generate_boundary, generate_staggered_boundary, generate_staggered_flux_boundary
from .cmake_integration import CodeGeneration, ManualCodeGenerationContext
from .function_generator import function_generator
@@ -8,7 +8,7 @@
generate_pack_info_from_kernel, generate_mpidtype_info_from_kernel)
from .utility import generate_info_header, get_vectorize_instruction_set, config_from_context
-__all__ = ['generate_staggered_boundary', 'generate_staggered_flux_boundary',
+__all__ = ['generate_boundary', 'generate_staggered_boundary', 'generate_staggered_flux_boundary',
'CodeGeneration', 'ManualCodeGenerationContext',
'function_generator',
'generate_sweep', 'generate_selective_sweep', 'generate_sweep_collection',
diff --git a/python/pystencils_walberla/boundary.py b/python/pystencils_walberla/boundary.py
index 7af79ed6..6e4ff76b 100644
--- a/python/pystencils_walberla/boundary.py
+++ b/python/pystencils_walberla/boundary.py
@@ -67,8 +67,16 @@ def generate_boundary(generation_context,
if not kernel_creation_function:
kernel_creation_function = create_boundary_kernel
- kernel = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object,
- target=target, **create_kernel_params)
+ bc_force = hasattr(boundary_object, "calculate_force_on_boundary") and boundary_object.calculate_force_on_boundary
+ if bc_force:
+ force_vector_type = np.dtype([(f"F_{i}", np.float64) for i in range(dim)], align=True)
+ force_vector = Field('forceVector', FieldType.INDEXED, force_vector_type, layout=[0],
+ shape=(TypedSymbol("forceVectorSize", create_type("int32")), 1), strides=(1, 1))
+ kernel = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object,
+ target=target, force_vector=force_vector, **create_kernel_params)
+ else:
+ kernel = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object,
+ target=target, **create_kernel_params)
if isinstance(kernel, KernelFunction):
kernel.function_name = f"boundary_{boundary_object.name}"
@@ -103,7 +111,8 @@ def generate_boundary(generation_context,
'additional_data_handler': additional_data_handler,
'dtype': "double" if is_float else "float",
'layout': layout,
- 'index_shape': index_shape
+ 'index_shape': index_shape,
+ 'calculate_force': bc_force
}
env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined)
diff --git a/python/pystencils_walberla/jinja_filters.py b/python/pystencils_walberla/jinja_filters.py
index 6d05bf8f..d83ed6bb 100644
--- a/python/pystencils_walberla/jinja_filters.py
+++ b/python/pystencils_walberla/jinja_filters.py
@@ -9,27 +9,24 @@
from pystencils import Target, Backend
from pystencils.backends.cbackend import generate_c
-from pystencils.typing import TypedSymbol, get_base_type
+from pystencils.typing import TypedSymbol, get_base_type, PointerType
from pystencils.field import FieldType
from pystencils.sympyextensions import prod
temporary_fieldPointerTemplate = """{type}"""
-temporary_fieldMemberTemplate = """
-private: std::set< {type} *, field::SwapableCompare< {type} * > > cache_{original_field_name}_;"""
+temporary_fieldMemberTemplate = "std::unordered_map cache_{original_field_name}_;"
temporary_fieldTemplate = """
{{
- // Getting temporary field {tmp_field_name}
- auto it = cache_{original_field_name}_.find( {original_field_name} );
- if( it != cache_{original_field_name}_.end() )
+ if (cache_{original_field_name}_.find(block) == cache_{original_field_name}_.end())
{{
- {tmp_field_name} = *it;
+ {tmp_field_name} = {original_field_name}->cloneUninitialized();
+ cache_{original_field_name}_[block] = {tmp_field_name};
}}
else
{{
- {tmp_field_name} = {original_field_name}->cloneUninitialized();
- cache_{original_field_name}_.insert({tmp_field_name});
+ {tmp_field_name} = cache_{original_field_name}_[block];
}}
}}
"""
@@ -40,7 +37,7 @@
delete_loop = """
for(auto p: cache_{original_field_name}_) {{
- delete p;
+ delete p.second;
}}
"""
@@ -226,17 +223,23 @@ def generate_block_data_to_field_extraction(ctx, kernel_info, parameters_to_igno
def generate_refs_for_kernel_parameters(kernel_info, prefix, parameters_to_ignore=(), ignore_fields=False,
- parameter_registration=None):
+ parameter_registration=None, level_known=False):
+
+ pointer_symbols = {p.symbol.name for p in kernel_info.parameters
+ if not p.is_field_parameter and isinstance(p.symbol.dtype, PointerType)}
symbols = {p.field_name for p in kernel_info.parameters if p.is_field_pointer and not ignore_fields}
symbols.update(p.symbol.name for p in kernel_info.parameters if not p.is_field_parameter)
symbols.difference_update(parameters_to_ignore)
+ if ignore_fields:
+ symbols.difference_update(pointer_symbols)
type_information = {p.symbol.name: p.symbol.dtype for p in kernel_info.parameters if not p.is_field_parameter}
result = []
registered_parameters = [] if not parameter_registration else parameter_registration.scaling_info
for s in symbols:
if s in registered_parameters:
dtype = type_information[s].c_name
- result.append("const uint_t level = block->getBlockStorage().getLevel(*block);")
+ if not level_known:
+ result.append("const uint_t level = block->getBlockStorage().getLevel(*block);")
result.append(f"{dtype} & {s} = {s}Vector[level];")
else:
result.append(f"auto & {s} = {prefix}{s}_;")
@@ -245,7 +248,7 @@ def generate_refs_for_kernel_parameters(kernel_info, prefix, parameters_to_ignor
@jinja2_context_decorator
def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, stream='0',
- spatial_shape_symbols=()):
+ spatial_shape_symbols=(), plain_kernel_call=False):
"""Generates the function call to a pystencils kernel
Args:
@@ -265,6 +268,20 @@ def generate_call(ctx, kernel, ghost_layers_to_include=0, cell_interval=None, st
parameters - however in special cases like boundary conditions a manual specification
may be necessary.
"""
+ ast_params = kernel.parameters
+ if len(spatial_shape_symbols) == 0:
+ for param in ast_params:
+ if param.is_field_parameter and FieldType.is_indexed(param.fields[0]):
+ continue
+ if param.is_field_pointer:
+ field = param.fields[0]
+ if field.has_fixed_shape:
+ spatial_shape_symbols = field.spatial_shape
+
+ if plain_kernel_call:
+ return kernel.generate_kernel_invocation_code(plain_kernel_call=True, stream=stream,
+ spatial_shape_symbols=spatial_shape_symbols)
+
assert isinstance(ghost_layers_to_include, str) or ghost_layers_to_include >= 0
ast_params = kernel.parameters
vec_info = ctx.get('cpu_vectorize_info', None)
@@ -296,7 +313,7 @@ def get_cell_interval(field_object):
if isinstance(cell_interval, str):
return cell_interval
elif isinstance(cell_interval, dict):
- return cell_interval[field_object]
+ return cell_interval[field_object.name]
else:
return None
@@ -591,7 +608,8 @@ def generate_members(ctx, kernel_infos, parameters_to_ignore=None, only_fields=F
original_field_name = field_name[:-len('_tmp')]
f_size = get_field_fsize(f)
field_type = make_field_type(get_base_type(f.dtype), f_size, is_gpu)
- result.append(temporary_fieldMemberTemplate.format(type=field_type, original_field_name=original_field_name))
+ result.append(temporary_fieldMemberTemplate.format(type=field_type,
+ original_field_name=original_field_name))
for kernel_info in kernel_infos:
if hasattr(kernel_info, 'varying_parameters'):
@@ -734,6 +752,16 @@ def generate_constructor(ctx, kernel_infos, parameter_registration):
return "\n".join(result)
+@jinja2_context_decorator
+def generate_field_strides(ctx, kernel_info):
+ result = []
+ for param in kernel_info.parameters:
+ if param.is_field_stride:
+ type_str = param.symbol.dtype.c_name
+ result.append(f"const {type_str} {param.symbol.name} = {param.symbol.name[1:]};")
+ return "\n".join(result)
+
+
def generate_list_of_expressions(expressions, prepend=''):
if len(expressions) == 0:
return ''
@@ -806,3 +834,4 @@ def add_pystencils_filters_to_jinja_env(jinja_env):
jinja_env.filters['identifier_list'] = identifier_list
jinja_env.filters['list_of_expressions'] = generate_list_of_expressions
jinja_env.filters['field_type'] = field_type
+ jinja_env.filters['generate_field_strides'] = generate_field_strides
diff --git a/python/pystencils_walberla/kernel_info.py b/python/pystencils_walberla/kernel_info.py
index 586c05ab..698b0511 100644
--- a/python/pystencils_walberla/kernel_info.py
+++ b/python/pystencils_walberla/kernel_info.py
@@ -32,13 +32,20 @@ def get_headers(self):
all_headers = [list(get_headers(self.ast))]
return reduce(merge_sorted_lists, all_headers)
- def generate_kernel_invocation_code(self, **kwargs):
+ def generate_kernel_invocation_code(self, plain_kernel_call=False, **kwargs):
ast = self.ast
ast_params = self.parameters
fnc_name = ast.function_name
is_cpu = self.ast.target == Target.CPU
call_parameters = ", ".join([p.symbol.name for p in ast_params])
+ if plain_kernel_call:
+ if is_cpu:
+ return f"internal_{fnc_name}::{fnc_name}({call_parameters});"
+ else:
+ stream = kwargs.get('stream', '0')
+ return f"internal_{fnc_name}::{fnc_name}<<<_grid, _block, 0, {stream}>>>({call_parameters});"
+
if not is_cpu:
stream = kwargs.get('stream', '0')
spatial_shape_symbols = kwargs.get('spatial_shape_symbols', ())
diff --git a/python/pystencils_walberla/kernel_selection.py b/python/pystencils_walberla/kernel_selection.py
index ad8a9986..3195624f 100644
--- a/python/pystencils_walberla/kernel_selection.py
+++ b/python/pystencils_walberla/kernel_selection.py
@@ -157,6 +157,21 @@ def case_code(case, subtree):
return switch_code
+class AbortNode(AbstractKernelSelectionNode):
+ def __init__(self, message):
+ self.message = message
+
+ @property
+ def selection_parameters(self):
+ return set()
+
+ def collect_kernel_calls(self):
+ return set()
+
+ def get_code(self, **kwargs):
+ return f'WALBERLA_ABORT("{self.message}")'
+
+
class KernelCallNode(AbstractKernelSelectionNode):
def __init__(self, ast):
self.ast = ast
@@ -169,13 +184,20 @@ def selection_parameters(self) -> Set[TypedSymbol]:
def collect_kernel_calls(self):
return {self}
- def get_code(self, **kwargs):
+ def get_code(self, plain_kernel_call=False, **kwargs):
ast = self.ast
ast_params = self.parameters
fnc_name = ast.function_name
is_cpu = self.ast.target == Target.CPU
call_parameters = ", ".join([p.symbol.name for p in ast_params])
+ if plain_kernel_call:
+ if is_cpu:
+ return f"internal_{fnc_name}::{fnc_name}({call_parameters});"
+ else:
+ stream = kwargs.get('stream', '0')
+ return f"internal_{fnc_name}::{fnc_name}<<<_grid, _block, 0, {stream}>>>({call_parameters});"
+
if not is_cpu:
stream = kwargs.get('stream', '0')
spatial_shape_symbols = kwargs.get('spatial_shape_symbols', ())
diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.cpp b/python/pystencils_walberla/templates/Boundary.tmpl.cpp
index 644202ba..3ece9a91 100644
--- a/python/pystencils_walberla/templates/Boundary.tmpl.cpp
+++ b/python/pystencils_walberla/templates/Boundary.tmpl.cpp
@@ -82,10 +82,23 @@ void {{class_name}}::run_impl(
uint8_t * _data_indexVector = reinterpret_cast(pointer);
- {{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize'])|indent(4)}}
+ {% if calculate_force -%}
+ auto * forceVector = block->getData(forceVectorID);
+ WALBERLA_ASSERT_EQUAL(indexVectorSize, int32_c( forceVector->forceVector().size() ))
+
+ {% if target == 'gpu' -%}
+ auto forcePointer = forceVector->pointerGpu();
+ int32_t forceVectorSize = int32_c( forceVector->forceVector().size() );
+ {% else %}
+ auto forcePointer = forceVector->pointerCpu();
+ {% endif %}
+ uint8_t * _data_forceVector = reinterpret_cast(forcePointer);
+ {%- endif %}
+
+ {{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize'])|indent(4)}}
{{kernel|generate_timestep_advancements|indent(4)}}
- {{kernel|generate_refs_for_kernel_parameters(prefix='', parameters_to_ignore=['indexVectorSize'], ignore_fields=True)|indent(4) }}
- {{kernel|generate_call(spatial_shape_symbols=['indexVectorSize'], stream='stream')|indent(4)}}
+ {{kernel|generate_refs_for_kernel_parameters(prefix='', parameters_to_ignore=['indexVectorSize', 'forceVectorSize'], ignore_fields=True)|indent(4) }}
+ {{kernel|generate_call(spatial_shape_symbols=['indexVectorSize', 'forceVectorSize'], stream='stream')|indent(4)}}
}
void {{class_name}}::run(
diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.h b/python/pystencils_walberla/templates/Boundary.tmpl.h
index 704a7227..43fd409e 100644
--- a/python/pystencils_walberla/templates/Boundary.tmpl.h
+++ b/python/pystencils_walberla/templates/Boundary.tmpl.h
@@ -110,6 +110,9 @@ class {{class_name}}
{%- endif %}
}
+
+
+
private:
std::vector cpuVectors_{NUM_TYPES};
@@ -119,12 +122,88 @@ class {{class_name}}
{%- endif %}
};
+ {% if calculate_force -%}
+
+ struct ForceStruct {
+ double F_0;
+ double F_1;
+ double F_2;
+ ForceStruct() : F_0(double_c(0.0)), F_1(double_c(0.0)), F_2(double_c(0.0)) {}
+ bool operator==(const ForceStruct & o) const {
+ return floatIsEqual(F_0, o.F_0) && floatIsEqual(F_1, o.F_1) && floatIsEqual(F_2, o.F_2);
+ }
+ };
+
+ class ForceVector
+ {
+ public:
+ ForceVector() = default;
+ bool operator==(ForceVector const &other) const { return other.cpuVector_ == cpuVector_; }
+
+ {% if target == 'gpu' -%}
+ ~ForceVector() {if(!gpuVector_.empty()){WALBERLA_GPU_CHECK(gpuFree( gpuVector_[0] ))}}
+ {% endif -%}
+
+ std::vector & forceVector() { return cpuVector_; }
+ ForceStruct * pointerCpu() { return cpuVector_.data(); }
+ bool empty() {return cpuVector_.empty();}
+
+ {% if target == 'gpu' -%}
+ ForceStruct * pointerGpu() { return gpuVector_[0]; }
+ {% endif -%}
+
+ Vector3 getForce()
+ {
+ syncCPU();
+ Vector3 result(double_c(0.0));
+ for(std::vector::iterator it = cpuVector_.begin(); it != cpuVector_.end(); ++it)
+ {
+ result[0] += it->F_0;
+ result[1] += it->F_1;
+ result[2] += it->F_2;
+ }
+ return result;
+ }
+
+ void syncGPU()
+ {
+ {% if target == 'gpu' -%}
+ if(!gpuVector_.empty()){WALBERLA_GPU_CHECK(gpuFree( gpuVector_[0] ))}
+ if(!cpuVector_.empty())
+ {
+ gpuVector_.resize(cpuVector_.size());
+ WALBERLA_GPU_CHECK(gpuMalloc(&gpuVector_[0], sizeof(ForceStruct) * cpuVector_.size()))
+ WALBERLA_GPU_CHECK(gpuMemcpy(gpuVector_[0], &cpuVector_[0], sizeof(ForceStruct) * cpuVector_.size(), gpuMemcpyHostToDevice))
+ }
+ {%- endif %}
+ }
+
+ void syncCPU()
+ {
+ {% if target == 'gpu' -%}
+ WALBERLA_GPU_CHECK(gpuMemcpy( &cpuVector_[0], gpuVector_[0] , sizeof(ForceStruct) * cpuVector_.size(), gpuMemcpyDeviceToHost ))
+ {%- endif %}
+ }
+
+ private:
+ std::vector cpuVector_;
+ {% if target == 'gpu' -%}
+ std::vector gpuVector_;
+ {%- endif %}
+ };
+
+ {%- endif %}
+
{{class_name}}( const shared_ptr & blocks,
- {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}}{{additional_data_handler.constructor_arguments}})
- :{{additional_data_handler.initialiser_list}} {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }}
+ {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize'])}}{{additional_data_handler.constructor_arguments}})
+ :{{additional_data_handler.initialiser_list}} {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize']) }}
{
auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_{{class_name}}");
+ {% if calculate_force -%}
+ auto createForceVector = []( IBlock * const , StructuredBlockStorage * const ) { return new ForceVector(); };
+ forceVectorID = blocks->addStructuredBlockData< ForceVector >( createForceVector, "forceVector_{{class_name}}");
+ {%- endif %}
};
void run (
@@ -148,6 +227,19 @@ class {{class_name}}
{{- ["IBlock * block", kernel.kernel_selection_parameters, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}}
);
+ Vector3 getForce(IBlock * {% if calculate_force -%}block{%else%}/*block*/{%- endif %})
+ {
+ {% if calculate_force -%}
+ auto * forceVector = block->getData(forceVectorID);
+ if(forceVector->empty())
+ return Vector3(double_c(0.0));
+ return forceVector->getForce();
+ {% else %}
+ WALBERLA_ABORT("Boundary condition was not generated including force calculation.")
+ return Vector3(double_c(0.0));
+ {%- endif %}
+ }
+
std::function getSweep( {{- [interface_spec.high_level_args, ["gpuStream_t stream = nullptr"] if target == 'gpu' else []] | type_identifier_list -}} )
{
return [ {{- ["this", interface_spec.high_level_args, ["stream"] if target == 'gpu' else []] | identifier_list -}} ]
@@ -186,6 +278,9 @@ class {{class_name}}
auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+ {% if calculate_force -%}
+ auto * forceVector = block->getData< ForceVector > ( forceVectorID );
+ {%- endif %}
auto * flagField = block->getData< FlagField_T > ( flagFieldID );
{{additional_data_handler.additional_field_data|indent(4)}}
@@ -300,6 +395,10 @@ class {{class_name}}
{% endif %}
indexVectors->syncGPU();
+ {% if calculate_force -%}
+ forceVector->forceVector().resize(indexVectorAll.size());
+ forceVector->syncGPU();
+ {%- endif %}
}
private:
@@ -310,9 +409,12 @@ class {{class_name}}
);
BlockDataID indexVectorID;
+ {% if calculate_force -%}
+ BlockDataID forceVectorID;
+ {%- endif %}
{{additional_data_handler.additional_member_variable|indent(4)}}
public:
- {{kernel|generate_members(('indexVector', 'indexVectorSize'))|indent(4)}}
+ {{kernel|generate_members(('indexVector', 'indexVectorSize', 'forceVector', 'forceVectorSize'))|indent(4)}}
};
diff --git a/src/blockforest/AABBRefinementSelection.h b/src/blockforest/AABBRefinementSelection.h
index 45847dca..f92b062d 100644
--- a/src/blockforest/AABBRefinementSelection.h
+++ b/src/blockforest/AABBRefinementSelection.h
@@ -46,7 +46,7 @@ class AABBRefinementSelection
{
if( configBlock )
{
- auto refinementBlock = configBlock.getBlock( "AABBRefinementSelection" );
+ auto refinementBlock = configBlock.getKey() == "AABBRefinementSelection" ? configBlock : configBlock.getBlock( "AABBRefinementSelection" );
if( refinementBlock )
{
diff --git a/src/blockforest/communication/UniformBufferedScheme.h b/src/blockforest/communication/UniformBufferedScheme.h
index 1693d0d6..e7cbd6c6 100644
--- a/src/blockforest/communication/UniformBufferedScheme.h
+++ b/src/blockforest/communication/UniformBufferedScheme.h
@@ -158,9 +158,9 @@ class UniformBufferedScheme
void startCommunication();
void wait();
+ std::function getCommunicateFunctor();
std::function getStartCommunicateFunctor();
std::function getWaitFunctor();
- std::function getCommunicateFunctor();
//@}
//*******************************************************************************************************************
@@ -535,21 +535,21 @@ void UniformBufferedScheme::localBufferUnpacking( const uint_t index, c
}
template< typename Stencil >
-std::function UniformBufferedScheme::getStartCommunicateFunctor()
+std::function UniformBufferedScheme::getCommunicateFunctor()
{
- return std::bind( &UniformBufferedScheme::startCommunication, this );
+ return std::bind( &UniformBufferedScheme::communicate, this );
}
template< typename Stencil >
-std::function UniformBufferedScheme::getWaitFunctor()
+std::function UniformBufferedScheme::getStartCommunicateFunctor()
{
- return std::bind( &UniformBufferedScheme::wait, this );
+ return std::bind( &UniformBufferedScheme::startCommunication, this );
}
template< typename Stencil >
-std::function UniformBufferedScheme::getCommunicateFunctor()
+std::function UniformBufferedScheme::getWaitFunctor()
{
- return std::bind( &UniformBufferedScheme::communicate, this );
+ return std::bind( &UniformBufferedScheme::wait, this );
}
} // namespace communication
diff --git a/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h b/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h
index a5dc8d8c..de398e65 100644
--- a/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h
+++ b/src/gpu/communication/GeneratedNonUniformGPUPackInfo.h
@@ -46,6 +46,8 @@ class GeneratedNonUniformGPUPackInfo
inline void packDataEqualLevel( const Block * sender, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream = nullptr) const;
virtual void unpackDataEqualLevel( Block * receiver, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream) = 0;
+ virtual void addForLocalEqualLevelComm(const Block* sender, Block* receiver, stencil::Direction dir) = 0;
+ virtual void communicateLocalEqualLevel(uint64_t level, uint8_t timestep, gpuStream_t stream) = 0;
virtual void communicateLocalEqualLevel( const Block * sender, Block * receiver, stencil::Direction dir, gpuStream_t stream) = 0;
inline void packDataCoarseToFine ( const Block * coarseSender, const BlockID & fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream = nullptr) const;
@@ -63,6 +65,7 @@ class GeneratedNonUniformGPUPackInfo
virtual uint_t sizeCoarseToFineReceive ( Block* fineReceiver, stencil::Direction dir) = 0;
virtual uint_t sizeFineToCoarseSend ( const Block * fineSender, stencil::Direction dir) = 0;
+ virtual void sync() = 0;
#ifndef NDEBUG
void clearBufferSizeCheckMap() { bufferSize_.clear(); }
diff --git a/src/gpu/communication/NonUniformGPUScheme.h b/src/gpu/communication/NonUniformGPUScheme.h
index b872be1d..ddd9fafe 100644
--- a/src/gpu/communication/NonUniformGPUScheme.h
+++ b/src/gpu/communication/NonUniformGPUScheme.h
@@ -90,6 +90,8 @@ class NonUniformGPUScheme
inline void waitCommunicateCoarseToFine(uint_t fineLevel);
inline void waitCommunicateFineToCoarse(uint_t fineLevel);
+ inline void setTimestepForLevel(uint_t level, uint8_t timestep) {timestepPerLevel_[level] = timestep;}
+
private:
void setupCommunication();
@@ -128,6 +130,7 @@ class NonUniformGPUScheme
Set< SUID > incompatibleBlockSelectors_;
gpuStream_t streams_[Stencil::Q];
+ std::vector< uint8_t > timestepPerLevel_;
};
template< typename Stencil >
@@ -196,6 +199,7 @@ void NonUniformGPUScheme< Stencil >::refresh()
WALBERLA_CHECK_NOT_NULLPTR(forest,
"Trying to access communication for a block storage object that doesn't exist anymore")
const uint_t levels = forest->getNumberOfLevels();
+ timestepPerLevel_.resize(levels);
for (uint_t i = 0; i != 3; ++i)
{
@@ -313,6 +317,13 @@ void NonUniformGPUScheme< Stencil >::startCommunicationEqualLevel(const uint_t i
for (auto it : headers_[EQUAL_LEVEL][index])
bufferSystemGPU_[EQUAL_LEVEL][index].sendBuffer(it.first).clear();
+ // If localCommunication is generated blockwise it is executed here.
+ for (auto level : participatingLevels){
+ for (auto& pi : packInfos_){
+ pi->communicateLocalEqualLevel(level, timestepPerLevel_[level], streams_[0]);
+ }
+ }
+
// Start filling send buffers
for (auto& iBlock : *forest)
{
@@ -369,13 +380,12 @@ void NonUniformGPUScheme< Stencil >::startCommunicationEqualLevel(const uint_t i
}
}
}
+
// wait for packing to finish
- for (uint_t i = 0; i < Stencil::Q; ++i)
- {
+ for (uint_t i = 0; i < Stencil::Q; ++i){
WALBERLA_GPU_CHECK(gpuStreamSynchronize(streams_[i]))
}
-
if (sendFromGPU_)
bufferSystemGPU_[EQUAL_LEVEL][index].sendAll();
else
@@ -836,8 +846,14 @@ void NonUniformGPUScheme< Stencil >::setupCommunication()
if (!selectable::isSetSelected(block->getNeighborState(neighborIdx, uint_t(0)), requiredBlockSelectors_,
incompatibleBlockSelectors_))
continue;
- if( block->neighborExistsLocally( neighborIdx, uint_t(0) ) )
+
+ if( block->neighborExistsLocally( neighborIdx, uint_t(0) ) ){
+ auto receiverBlock = dynamic_cast< Block * >( forest->getBlock( block->getNeighborId( neighborIdx, uint_t(0) )) );
+ for (auto& pi : packInfos_){
+ pi->addForLocalEqualLevelComm(block, receiverBlock, *dir);
+ }
continue;
+ }
const BlockID& receiverId = block->getNeighborId(neighborIdx, uint_t(0));
auto nProcess = mpi::MPIRank(block->getNeighborProcess(neighborIdx, uint_t(0)));
@@ -915,6 +931,10 @@ void NonUniformGPUScheme< Stencil >::setupCommunication()
}
}
+ for (auto& pi : packInfos_){
+ pi->sync();
+ }
+
for (uint_t i = 0; i != 3; ++i)
{
for (uint_t j = 0; j <= levels; ++j)
diff --git a/src/lbm_generated/boundary/D3Q19BoundaryCollection.h b/src/lbm_generated/boundary/D3Q19BoundaryCollection.h
index eb1a23fb..cca61653 100644
--- a/src/lbm_generated/boundary/D3Q19BoundaryCollection.h
+++ b/src/lbm_generated/boundary/D3Q19BoundaryCollection.h
@@ -22,6 +22,8 @@
#include "core/DataTypes.h"
#include "domain_decomposition/IBlock.h"
+
+
#include "OutflowD3Q19.h"
#include "FixedDensityD3Q19.h"
#include "FreeSlipD3Q19.h"
@@ -43,11 +45,11 @@ class D3Q19BoundaryCollection
D3Q19BoundaryCollection(const shared_ptr & blocks, BlockDataID flagID_, BlockDataID pdfsID_, FlagUID domainUID_, double density, double u_x, double u_y, double u_z)
: blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_)
{
- OutflowD3Q19Object = std::make_shared< lbm::OutflowD3Q19 >(blocks, pdfsID);
- FixedDensityD3Q19Object = std::make_shared< lbm::FixedDensityD3Q19 >(blocks, pdfsID, density);
- FreeSlipD3Q19Object = std::make_shared< lbm::FreeSlipD3Q19 >(blocks, pdfsID);
- NoSlipD3Q19Object = std::make_shared< lbm::NoSlipD3Q19 >(blocks, pdfsID);
- UBBD3Q19Object = std::make_shared< lbm::UBBD3Q19 >(blocks, pdfsID, u_x, u_y, u_z);
+ OutflowD3Q19Object = std::make_shared< lbm::OutflowD3Q19 >(blocks, pdfsID_);
+ FixedDensityD3Q19Object = std::make_shared< lbm::FixedDensityD3Q19 >(blocks, pdfsID_, density);
+ FreeSlipD3Q19Object = std::make_shared< lbm::FreeSlipD3Q19 >(blocks, pdfsID_);
+ NoSlipD3Q19Object = std::make_shared< lbm::NoSlipD3Q19 >(blocks, pdfsID_);
+ UBBD3Q19Object = std::make_shared< lbm::UBBD3Q19 >(blocks, pdfsID_, u_x, u_y, u_z);
OutflowD3Q19Object->fillFromFlagField(blocks, flagID, walberla::FlagUID("Outflow"), domainUID);
@@ -120,4 +122,4 @@ class D3Q19BoundaryCollection
};
}
-}
+}
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/D3Q27BoundaryCollection.h b/src/lbm_generated/boundary/D3Q27BoundaryCollection.h
index 3428689b..cb7a039d 100644
--- a/src/lbm_generated/boundary/D3Q27BoundaryCollection.h
+++ b/src/lbm_generated/boundary/D3Q27BoundaryCollection.h
@@ -22,6 +22,8 @@
#include "core/DataTypes.h"
#include "domain_decomposition/IBlock.h"
+
+
#include "OutflowD3Q27.h"
#include "FixedDensityD3Q27.h"
#include "FreeSlipD3Q27.h"
@@ -43,11 +45,11 @@ class D3Q27BoundaryCollection
D3Q27BoundaryCollection(const shared_ptr & blocks, BlockDataID flagID_, BlockDataID pdfsID_, FlagUID domainUID_, double density, double u_x, double u_y, double u_z)
: blocks_(blocks), flagID(flagID_), pdfsID(pdfsID_), domainUID(domainUID_)
{
- OutflowD3Q27Object = std::make_shared< lbm::OutflowD3Q27 >(blocks, pdfsID);
- FixedDensityD3Q27Object = std::make_shared< lbm::FixedDensityD3Q27 >(blocks, pdfsID, density);
- FreeSlipD3Q27Object = std::make_shared< lbm::FreeSlipD3Q27 >(blocks, pdfsID);
- NoSlipD3Q27Object = std::make_shared< lbm::NoSlipD3Q27 >(blocks, pdfsID);
- UBBD3Q27Object = std::make_shared< lbm::UBBD3Q27 >(blocks, pdfsID, u_x, u_y, u_z);
+ OutflowD3Q27Object = std::make_shared< lbm::OutflowD3Q27 >(blocks, pdfsID_);
+ FixedDensityD3Q27Object = std::make_shared< lbm::FixedDensityD3Q27 >(blocks, pdfsID_, density);
+ FreeSlipD3Q27Object = std::make_shared< lbm::FreeSlipD3Q27 >(blocks, pdfsID_);
+ NoSlipD3Q27Object = std::make_shared< lbm::NoSlipD3Q27 >(blocks, pdfsID_);
+ UBBD3Q27Object = std::make_shared< lbm::UBBD3Q27 >(blocks, pdfsID_, u_x, u_y, u_z);
OutflowD3Q27Object->fillFromFlagField(blocks, flagID, walberla::FlagUID("Outflow"), domainUID);
@@ -120,4 +122,4 @@ class D3Q27BoundaryCollection
};
}
-}
+}
\ No newline at end of file
diff --git a/src/lbm_generated/boundary/FixedDensityD3Q19.cpp b/src/lbm_generated/boundary/FixedDensityD3Q19.cpp
index e449704f..9bfdd22e 100644
--- a/src/lbm_generated/boundary/FixedDensityD3Q19.cpp
+++ b/src/lbm_generated/boundary/FixedDensityD3Q19.cpp
@@ -45,9 +45,9 @@ namespace lbm {
#pragma diag_suppress 177
#endif
#endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
namespace internal_fixeddensityd3q19_even {
-static FUNC_PREFIX void fixeddensityd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize)
+static FUNC_PREFIX void fixeddensityd3q19_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize)
{
const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 };
@@ -59,29 +59,29 @@ static FUNC_PREFIX void fixeddensityd3q19_even(const uint8_t * RESTRICT const _d
const double delta_rho = rho - 1.0;
for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
{
- const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
- const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
- const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
- const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+ const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
+ const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
+ const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
+ const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
const double vel0Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3];
const double vel1Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3];
const double vel2Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 5*_stride_pdfs_3];
- const double u_0 = vel0Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
- const double u_1 = vel1Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3];
- const double u_2 = vel2Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3];
- const double u0Mu1 = u_0 + u_1*-1.0;
+ const double u_0 = vel0Term - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
+ const double u_1 = vel1Term - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
+ const double u_2 = vel2Term + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3];
+ const double u0Mu1 = u_0 - u_1;
const double u0Pu1 = u_0 + u_1;
const double u1Pu2 = u_1 + u_2;
- const double u1Mu2 = u_1 + u_2*-1.0;
- const double u0Mu2 = u_0 + u_2*-1.0;
+ const double u1Mu2 = u_1 - u_2;
+ const double u0Mu2 = u_0 - u_2;
const double u0Pu2 = u_0 + u_2;
- const double f_eq_common = delta_rho - 1.0*(u_0*u_0) - 1.0*(u_1*u_1) - 1.0*(u_2*u_2);
- _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir] + 2.0*((((dir) == (0))) ? (f_eq_common*0.33333333333333331): ((((dir) == (1)) || ((dir) == (2))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): (0.0))))))))))))))));
+ const double f_eq_common = delta_rho - u_0*u_0 - u_1*u_1 - u_2*u_2;
+ _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = 2.0*((((dir) == (0))) ? (f_eq_common*0.33333333333333331): ((((dir) == (1)) || ((dir) == (2))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)): (0.0)))))))))))))))) - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
}
}
}
-
+//NOLINTEND(readability-non-const-parameter*)
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
diff --git a/src/lbm_generated/boundary/FixedDensityD3Q19.h b/src/lbm_generated/boundary/FixedDensityD3Q19.h
index b4575d18..2b6fa163 100644
--- a/src/lbm_generated/boundary/FixedDensityD3Q19.h
+++ b/src/lbm_generated/boundary/FixedDensityD3Q19.h
@@ -19,6 +19,7 @@
#pragma once
#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
#include "field/GhostLayerField.h"
#include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
#define RESTRICT
#endif
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
namespace walberla {
namespace lbm {
diff --git a/src/lbm_generated/boundary/FixedDensityD3Q27.cpp b/src/lbm_generated/boundary/FixedDensityD3Q27.cpp
index 3ff43bc5..f8ccd833 100644
--- a/src/lbm_generated/boundary/FixedDensityD3Q27.cpp
+++ b/src/lbm_generated/boundary/FixedDensityD3Q27.cpp
@@ -45,9 +45,9 @@ namespace lbm {
#pragma diag_suppress 177
#endif
#endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
namespace internal_fixeddensityd3q27_even {
-static FUNC_PREFIX void fixeddensityd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize)
+static FUNC_PREFIX void fixeddensityd3q27_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, double density, int32_t indexVectorSize)
{
const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 };
@@ -59,28 +59,29 @@ static FUNC_PREFIX void fixeddensityd3q27_even(const uint8_t * RESTRICT const _d
const double delta_rho = rho - 1.0;
for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
{
- const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
- const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
- const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
- const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+ const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
+ const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
+ const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
+ const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
const double vel0Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3];
const double vel1Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3];
const double vel2Term = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 5*_stride_pdfs_3];
- const double u_0 = vel0Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
- const double u_1 = vel1Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3];
- const double u_2 = vel2Term - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - 1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3];
- const double u0Mu1 = u_0 + u_1*-1.0;
+ const double u_0 = vel0Term - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
+ const double u_1 = vel1Term - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 2*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 8*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 9*_stride_pdfs_3];
+ const double u_2 = vel2Term + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 15*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 23*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + 6*_stride_pdfs_3];
+ const double u0Mu1 = u_0 - u_1;
const double u0Pu1 = u_0 + u_1;
const double u1Pu2 = u_1 + u_2;
- const double u1Mu2 = u_1 + u_2*-1.0;
- const double u0Mu2 = u_0 + u_2*-1.0;
+ const double u1Mu2 = u_1 - u_2;
+ const double u0Mu2 = u_0 - u_2;
const double u0Pu2 = u_0 + u_2;
const double f_eq_common = delta_rho - 1.5*(u_0*u_0) - 1.5*(u_1*u_1) - 1.5*(u_2*u_2);
- _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -1.0*_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir] + 2.0*((((dir) == (0))) ? (f_eq_common*0.29629629629629628): ((((dir) == (1)) || ((dir) == (2))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (19))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (20))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (21))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (22)) || ((dir) == (23))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (24))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (25))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (26))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): (0.0)))))))))))))))))))))));
+ _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = 2.0*((((dir) == (0))) ? (f_eq_common*0.29629629629629628): ((((dir) == (1)) || ((dir) == (2))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_1*u_1)): ((((dir) == (3)) || ((dir) == (4))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_0*u_0)): ((((dir) == (5)) || ((dir) == (6))) ? (f_eq_common*0.07407407407407407 + 0.33333333333333331*(u_2*u_2)): ((((dir) == (7))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (8)) || ((dir) == (9))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu1*u0Pu1)): ((((dir) == (10))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu1*u0Mu1)): ((((dir) == (11))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (12))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (13))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (14))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (15))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Mu2*u1Mu2)): ((((dir) == (16))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u1Pu2*u1Pu2)): ((((dir) == (17))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Pu2*u0Pu2)): ((((dir) == (18))) ? (f_eq_common*0.018518518518518517 + 0.083333333333333329*(u0Mu2*u0Mu2)): ((((dir) == (19))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (20))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (21))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (22)) || ((dir) == (23))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (24))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)): ((((dir) == (25))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): ((((dir) == (26))) ? (delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)): (0.0))))))))))))))))))))))) - _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
}
}
}
+//NOLINTEND(readability-non-const-parameter*)
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
diff --git a/src/lbm_generated/boundary/FixedDensityD3Q27.h b/src/lbm_generated/boundary/FixedDensityD3Q27.h
index 359540d2..8efeb95c 100644
--- a/src/lbm_generated/boundary/FixedDensityD3Q27.h
+++ b/src/lbm_generated/boundary/FixedDensityD3Q27.h
@@ -19,6 +19,7 @@
#pragma once
#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
#include "field/GhostLayerField.h"
#include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
#define RESTRICT
#endif
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
namespace walberla {
namespace lbm {
diff --git a/src/lbm_generated/boundary/FreeSlipD3Q19.cpp b/src/lbm_generated/boundary/FreeSlipD3Q19.cpp
index 2e3dc465..3bee7fa2 100644
--- a/src/lbm_generated/boundary/FreeSlipD3Q19.cpp
+++ b/src/lbm_generated/boundary/FreeSlipD3Q19.cpp
@@ -45,9 +45,9 @@ namespace lbm {
#pragma diag_suppress 177
#endif
#endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
namespace internal_freeslipd3q19_even {
-static FUNC_PREFIX void freeslipd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+static FUNC_PREFIX void freeslipd3q19_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
{
const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 };
@@ -63,16 +63,16 @@ static FUNC_PREFIX void freeslipd3q19_even(const uint8_t * RESTRICT const _data_
for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
{
- const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
- const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
- const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
- const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
- _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector[32*ctr_0 + 28]))];
+ const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
+ const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
+ const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
+ const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
+ _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector[32*ctr_0 + 28]))];
}
}
}
-
+//NOLINTEND(readability-non-const-parameter*)
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
diff --git a/src/lbm_generated/boundary/FreeSlipD3Q19.h b/src/lbm_generated/boundary/FreeSlipD3Q19.h
index 4679ffc4..1c436dd3 100644
--- a/src/lbm_generated/boundary/FreeSlipD3Q19.h
+++ b/src/lbm_generated/boundary/FreeSlipD3Q19.h
@@ -19,6 +19,7 @@
#pragma once
#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
#include "field/GhostLayerField.h"
#include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
#define RESTRICT
#endif
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
namespace walberla {
namespace lbm {
@@ -253,7 +258,7 @@ class FreeSlipD3Q19
element.wnx = 0;
element.wny = -1;
element.wnz = 0;
- ref_dir = 1;
+ ref_dir = 2;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -301,7 +306,7 @@ class FreeSlipD3Q19
element.wnx = 0;
element.wny = 1;
element.wnz = 0;
- ref_dir = 2;
+ ref_dir = 1;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -349,7 +354,7 @@ class FreeSlipD3Q19
element.wnx = 1;
element.wny = 0;
element.wnz = 0;
- ref_dir = 3;
+ ref_dir = 4;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -397,7 +402,7 @@ class FreeSlipD3Q19
element.wnx = -1;
element.wny = 0;
element.wnz = 0;
- ref_dir = 4;
+ ref_dir = 3;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -445,7 +450,7 @@ class FreeSlipD3Q19
element.wnx = 0;
element.wny = 0;
element.wnz = -1;
- ref_dir = 5;
+ ref_dir = 6;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -493,7 +498,7 @@ class FreeSlipD3Q19
element.wnx = 0;
element.wny = 0;
element.wnz = 1;
- ref_dir = 6;
+ ref_dir = 5;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -541,7 +546,7 @@ class FreeSlipD3Q19
element.wnx = 1;
element.wny = -1;
element.wnz = 0;
- ref_dir = 7;
+ ref_dir = 10;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -589,7 +594,7 @@ class FreeSlipD3Q19
element.wnx = -1;
element.wny = -1;
element.wnz = 0;
- ref_dir = 8;
+ ref_dir = 9;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -637,7 +642,7 @@ class FreeSlipD3Q19
element.wnx = 1;
element.wny = 1;
element.wnz = 0;
- ref_dir = 9;
+ ref_dir = 8;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -685,7 +690,7 @@ class FreeSlipD3Q19
element.wnx = -1;
element.wny = 1;
element.wnz = 0;
- ref_dir = 10;
+ ref_dir = 7;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -733,7 +738,7 @@ class FreeSlipD3Q19
element.wnx = 0;
element.wny = -1;
element.wnz = -1;
- ref_dir = 11;
+ ref_dir = 16;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -781,7 +786,7 @@ class FreeSlipD3Q19
element.wnx = 0;
element.wny = 1;
element.wnz = -1;
- ref_dir = 12;
+ ref_dir = 15;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -829,7 +834,7 @@ class FreeSlipD3Q19
element.wnx = 1;
element.wny = 0;
element.wnz = -1;
- ref_dir = 13;
+ ref_dir = 18;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -877,7 +882,7 @@ class FreeSlipD3Q19
element.wnx = -1;
element.wny = 0;
element.wnz = -1;
- ref_dir = 14;
+ ref_dir = 17;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -925,7 +930,7 @@ class FreeSlipD3Q19
element.wnx = 0;
element.wny = -1;
element.wnz = 1;
- ref_dir = 15;
+ ref_dir = 12;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -973,7 +978,7 @@ class FreeSlipD3Q19
element.wnx = 0;
element.wny = 1;
element.wnz = 1;
- ref_dir = 16;
+ ref_dir = 11;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -1021,7 +1026,7 @@ class FreeSlipD3Q19
element.wnx = 1;
element.wny = 0;
element.wnz = 1;
- ref_dir = 17;
+ ref_dir = 14;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -1069,7 +1074,7 @@ class FreeSlipD3Q19
element.wnx = -1;
element.wny = 0;
element.wnz = 1;
- ref_dir = 18;
+ ref_dir = 13;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
diff --git a/src/lbm_generated/boundary/FreeSlipD3Q27.cpp b/src/lbm_generated/boundary/FreeSlipD3Q27.cpp
index 3364610e..e71e3949 100644
--- a/src/lbm_generated/boundary/FreeSlipD3Q27.cpp
+++ b/src/lbm_generated/boundary/FreeSlipD3Q27.cpp
@@ -45,9 +45,9 @@ namespace lbm {
#pragma diag_suppress 177
#endif
#endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
namespace internal_freeslipd3q27_even {
-static FUNC_PREFIX void freeslipd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+static FUNC_PREFIX void freeslipd3q27_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
{
const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 };
@@ -63,16 +63,16 @@ static FUNC_PREFIX void freeslipd3q27_even(const uint8_t * RESTRICT const _data_
for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
{
- const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
- const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
- const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
- const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
- _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector[32*ctr_0 + 28]))];
+ const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
+ const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
+ const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
+ const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
+ _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 16])) + neighbour_offset_x[dir]) + _stride_pdfs_1*y + _stride_pdfs_1*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 20])) + neighbour_offset_y[dir]) + _stride_pdfs_2*z + _stride_pdfs_2*(*((int32_t * )(& _data_indexVector[32*ctr_0 + 24])) + neighbour_offset_z[dir]) + _stride_pdfs_3**((int32_t * )(& _data_indexVector[32*ctr_0 + 28]))];
}
}
}
-
+//NOLINTEND(readability-non-const-parameter*)
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
diff --git a/src/lbm_generated/boundary/FreeSlipD3Q27.h b/src/lbm_generated/boundary/FreeSlipD3Q27.h
index 562dfbca..759f7910 100644
--- a/src/lbm_generated/boundary/FreeSlipD3Q27.h
+++ b/src/lbm_generated/boundary/FreeSlipD3Q27.h
@@ -19,6 +19,7 @@
#pragma once
#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
#include "field/GhostLayerField.h"
#include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
#define RESTRICT
#endif
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
namespace walberla {
namespace lbm {
@@ -253,7 +258,7 @@ class FreeSlipD3Q27
element.wnx = 0;
element.wny = -1;
element.wnz = 0;
- ref_dir = 1;
+ ref_dir = 2;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -301,7 +306,7 @@ class FreeSlipD3Q27
element.wnx = 0;
element.wny = 1;
element.wnz = 0;
- ref_dir = 2;
+ ref_dir = 1;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -349,7 +354,7 @@ class FreeSlipD3Q27
element.wnx = 1;
element.wny = 0;
element.wnz = 0;
- ref_dir = 3;
+ ref_dir = 4;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -397,7 +402,7 @@ class FreeSlipD3Q27
element.wnx = -1;
element.wny = 0;
element.wnz = 0;
- ref_dir = 4;
+ ref_dir = 3;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -445,7 +450,7 @@ class FreeSlipD3Q27
element.wnx = 0;
element.wny = 0;
element.wnz = -1;
- ref_dir = 5;
+ ref_dir = 6;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -493,7 +498,7 @@ class FreeSlipD3Q27
element.wnx = 0;
element.wny = 0;
element.wnz = 1;
- ref_dir = 6;
+ ref_dir = 5;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -541,7 +546,7 @@ class FreeSlipD3Q27
element.wnx = 1;
element.wny = -1;
element.wnz = 0;
- ref_dir = 7;
+ ref_dir = 10;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -589,7 +594,7 @@ class FreeSlipD3Q27
element.wnx = -1;
element.wny = -1;
element.wnz = 0;
- ref_dir = 8;
+ ref_dir = 9;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -637,7 +642,7 @@ class FreeSlipD3Q27
element.wnx = 1;
element.wny = 1;
element.wnz = 0;
- ref_dir = 9;
+ ref_dir = 8;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -685,7 +690,7 @@ class FreeSlipD3Q27
element.wnx = -1;
element.wny = 1;
element.wnz = 0;
- ref_dir = 10;
+ ref_dir = 7;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -733,7 +738,7 @@ class FreeSlipD3Q27
element.wnx = 0;
element.wny = -1;
element.wnz = -1;
- ref_dir = 11;
+ ref_dir = 16;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -781,7 +786,7 @@ class FreeSlipD3Q27
element.wnx = 0;
element.wny = 1;
element.wnz = -1;
- ref_dir = 12;
+ ref_dir = 15;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -829,7 +834,7 @@ class FreeSlipD3Q27
element.wnx = 1;
element.wny = 0;
element.wnz = -1;
- ref_dir = 13;
+ ref_dir = 18;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -877,7 +882,7 @@ class FreeSlipD3Q27
element.wnx = -1;
element.wny = 0;
element.wnz = -1;
- ref_dir = 14;
+ ref_dir = 17;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -925,7 +930,7 @@ class FreeSlipD3Q27
element.wnx = 0;
element.wny = -1;
element.wnz = 1;
- ref_dir = 15;
+ ref_dir = 12;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -973,7 +978,7 @@ class FreeSlipD3Q27
element.wnx = 0;
element.wny = 1;
element.wnz = 1;
- ref_dir = 16;
+ ref_dir = 11;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -1021,7 +1026,7 @@ class FreeSlipD3Q27
element.wnx = 1;
element.wny = 0;
element.wnz = 1;
- ref_dir = 17;
+ ref_dir = 14;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -1069,7 +1074,7 @@ class FreeSlipD3Q27
element.wnx = -1;
element.wny = 0;
element.wnz = 1;
- ref_dir = 18;
+ ref_dir = 13;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -1117,7 +1122,7 @@ class FreeSlipD3Q27
element.wnx = -1;
element.wny = -1;
element.wnz = -1;
- ref_dir = 19;
+ ref_dir = 26;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -1165,7 +1170,7 @@ class FreeSlipD3Q27
element.wnx = 1;
element.wny = -1;
element.wnz = -1;
- ref_dir = 20;
+ ref_dir = 25;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -1213,7 +1218,7 @@ class FreeSlipD3Q27
element.wnx = -1;
element.wny = 1;
element.wnz = -1;
- ref_dir = 21;
+ ref_dir = 24;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -1261,7 +1266,7 @@ class FreeSlipD3Q27
element.wnx = 1;
element.wny = 1;
element.wnz = -1;
- ref_dir = 22;
+ ref_dir = 23;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -1309,7 +1314,7 @@ class FreeSlipD3Q27
element.wnx = -1;
element.wny = -1;
element.wnz = 1;
- ref_dir = 23;
+ ref_dir = 22;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -1357,7 +1362,7 @@ class FreeSlipD3Q27
element.wnx = 1;
element.wny = -1;
element.wnz = 1;
- ref_dir = 24;
+ ref_dir = 21;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -1405,7 +1410,7 @@ class FreeSlipD3Q27
element.wnx = -1;
element.wny = 1;
element.wnz = 1;
- ref_dir = 25;
+ ref_dir = 20;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
@@ -1453,7 +1458,7 @@ class FreeSlipD3Q27
element.wnx = 1;
element.wny = 1;
element.wnz = 1;
- ref_dir = 26;
+ ref_dir = 19;
}
element.ref_dir = ref_dir;
indexVectorAll.push_back( element );
diff --git a/src/lbm_generated/boundary/NoSlipD3Q19.cpp b/src/lbm_generated/boundary/NoSlipD3Q19.cpp
index 268cbf43..b56a975a 100644
--- a/src/lbm_generated/boundary/NoSlipD3Q19.cpp
+++ b/src/lbm_generated/boundary/NoSlipD3Q19.cpp
@@ -45,9 +45,9 @@ namespace lbm {
#pragma diag_suppress 177
#endif
#endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
namespace internal_noslipd3q19_even {
-static FUNC_PREFIX void noslipd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+static FUNC_PREFIX void noslipd3q19_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
{
const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 };
@@ -57,15 +57,16 @@ static FUNC_PREFIX void noslipd3q19_even(const uint8_t * RESTRICT const _data_in
for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
{
- const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
- const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
- const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
- const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+ const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
+ const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
+ const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
+ const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
}
}
}
+//NOLINTEND(readability-non-const-parameter*)
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
diff --git a/src/lbm_generated/boundary/NoSlipD3Q19.h b/src/lbm_generated/boundary/NoSlipD3Q19.h
index 933108ee..7541e7a9 100644
--- a/src/lbm_generated/boundary/NoSlipD3Q19.h
+++ b/src/lbm_generated/boundary/NoSlipD3Q19.h
@@ -19,6 +19,7 @@
#pragma once
#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
#include "field/GhostLayerField.h"
#include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
#define RESTRICT
#endif
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
namespace walberla {
namespace lbm {
diff --git a/src/lbm_generated/boundary/NoSlipD3Q27.cpp b/src/lbm_generated/boundary/NoSlipD3Q27.cpp
index c38bee81..9adee224 100644
--- a/src/lbm_generated/boundary/NoSlipD3Q27.cpp
+++ b/src/lbm_generated/boundary/NoSlipD3Q27.cpp
@@ -45,9 +45,9 @@ namespace lbm {
#pragma diag_suppress 177
#endif
#endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
namespace internal_noslipd3q27_even {
-static FUNC_PREFIX void noslipd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+static FUNC_PREFIX void noslipd3q27_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
{
const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 };
@@ -57,16 +57,16 @@ static FUNC_PREFIX void noslipd3q27_even(const uint8_t * RESTRICT const _data_in
for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
{
- const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
- const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
- const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
- const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+ const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
+ const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
+ const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
+ const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
}
}
}
-
+//NOLINTEND(readability-non-const-parameter*)
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
diff --git a/src/lbm_generated/boundary/NoSlipD3Q27.h b/src/lbm_generated/boundary/NoSlipD3Q27.h
index 56bbfb06..de0a6a35 100644
--- a/src/lbm_generated/boundary/NoSlipD3Q27.h
+++ b/src/lbm_generated/boundary/NoSlipD3Q27.h
@@ -19,6 +19,7 @@
#pragma once
#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
#include "field/GhostLayerField.h"
#include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
#define RESTRICT
#endif
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
namespace walberla {
namespace lbm {
diff --git a/src/lbm_generated/boundary/OutflowD3Q19.cpp b/src/lbm_generated/boundary/OutflowD3Q19.cpp
index d42cf904..13e14a59 100644
--- a/src/lbm_generated/boundary/OutflowD3Q19.cpp
+++ b/src/lbm_generated/boundary/OutflowD3Q19.cpp
@@ -45,13 +45,13 @@ namespace lbm {
#pragma diag_suppress 177
#endif
#endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
namespace internal_outflowd3q19_even {
-static FUNC_PREFIX void outflowd3q19_even(const uint8_t * RESTRICT _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+static FUNC_PREFIX void outflowd3q19_even(uint8_t * RESTRICT _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
{
- const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 };
const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 };
+ const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 };
const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1 };
const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0 };
const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 };
@@ -64,19 +64,19 @@ static FUNC_PREFIX void outflowd3q19_even(const uint8_t * RESTRICT _data_indexV
for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
{
- const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
- const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
- const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
- const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
- const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double * )(& _data_indexVector[32*ctr_0 + 16]));
+ const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
+ const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
+ const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
+ const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
+ const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double * )(& _data_indexVector[32*ctr_0 + 16]));
_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = pdf_inter;
- *((double * )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]];
- *((double * )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter;
+ *((double * )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]];
+ *((double * )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter;
}
}
}
-
+//NOLINTEND(readability-non-const-parameter*)
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
diff --git a/src/lbm_generated/boundary/OutflowD3Q19.h b/src/lbm_generated/boundary/OutflowD3Q19.h
index bb299996..bcde26bf 100644
--- a/src/lbm_generated/boundary/OutflowD3Q19.h
+++ b/src/lbm_generated/boundary/OutflowD3Q19.h
@@ -19,6 +19,7 @@
#pragma once
#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
#include "field/GhostLayerField.h"
#include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
#define RESTRICT
#endif
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
namespace walberla {
namespace lbm {
@@ -174,8 +179,8 @@ class OutflowD3Q19
if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 4 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
@@ -192,8 +197,8 @@ class OutflowD3Q19
if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 8 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
@@ -210,8 +215,8 @@ class OutflowD3Q19
if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 10 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
@@ -228,8 +233,8 @@ class OutflowD3Q19
if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 14 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
@@ -246,8 +251,8 @@ class OutflowD3Q19
if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 18 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
diff --git a/src/lbm_generated/boundary/OutflowD3Q27.cpp b/src/lbm_generated/boundary/OutflowD3Q27.cpp
index 8ec9a490..70926d56 100644
--- a/src/lbm_generated/boundary/OutflowD3Q27.cpp
+++ b/src/lbm_generated/boundary/OutflowD3Q27.cpp
@@ -45,13 +45,13 @@ namespace lbm {
#pragma diag_suppress 177
#endif
#endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
namespace internal_outflowd3q27_even {
-static FUNC_PREFIX void outflowd3q27_even(const uint8_t * RESTRICT _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
+static FUNC_PREFIX void outflowd3q27_even(uint8_t * RESTRICT _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
{
- const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 };
const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 };
+ const int32_t f_out_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 };
const int32_t f_in_inv_offsets_x [] = { 0,0,0,-1,1,0,0,-1,1,-1,1,0,0,-1,1,0,0,-1,1,1,-1,1,-1,1,-1,1,-1 };
const int32_t f_in_inv_offsets_y [] = { 0,1,-1,0,0,0,0,1,1,-1,-1,1,-1,0,0,1,-1,0,0,1,1,-1,-1,1,1,-1,-1 };
const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 };
@@ -64,19 +64,19 @@ static FUNC_PREFIX void outflowd3q27_even(const uint8_t * RESTRICT _data_indexV
for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
{
- const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
- const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
- const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
- const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
- const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double * )(& _data_indexVector[32*ctr_0 + 16]));
+ const int32_t x = *((int32_t * )(& _data_indexVector[32*ctr_0]));
+ const int32_t y = *((int32_t * )(& _data_indexVector[32*ctr_0 + 4]));
+ const int32_t z = *((int32_t * )(& _data_indexVector[32*ctr_0 + 8]));
+ const int32_t dir = *((int32_t * )(& _data_indexVector[32*ctr_0 + 12]));
+ const double pdf_inter = 0.42264973081037427**((double * )(& _data_indexVector[32*ctr_0 + 24])) + 0.57735026918962573**((double * )(& _data_indexVector[32*ctr_0 + 16]));
_data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = pdf_inter;
- *((double * )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]];
- *((double * )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter;
+ *((double * )(& _data_indexVector[32*ctr_0 + 16])) = _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*(neighbour_offset_x[dir] - 1) + _stride_pdfs_1*y + _stride_pdfs_1*neighbour_offset_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*neighbour_offset_z[dir] + _stride_pdfs_3*f_out_inv_dir_idx[dir]];
+ *((double * )(& _data_indexVector[32*ctr_0 + 24])) = pdf_inter;
}
}
}
-
+//NOLINTEND(readability-non-const-parameter*)
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
diff --git a/src/lbm_generated/boundary/OutflowD3Q27.h b/src/lbm_generated/boundary/OutflowD3Q27.h
index 53b4e4ba..e62365e4 100644
--- a/src/lbm_generated/boundary/OutflowD3Q27.h
+++ b/src/lbm_generated/boundary/OutflowD3Q27.h
@@ -19,6 +19,7 @@
#pragma once
#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
#include "field/GhostLayerField.h"
#include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
#define RESTRICT
#endif
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
namespace walberla {
namespace lbm {
@@ -174,8 +179,8 @@ class OutflowD3Q27
if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 4 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(0), 3) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
@@ -192,8 +197,8 @@ class OutflowD3Q27
if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 8 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(0), 9) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
@@ -210,8 +215,8 @@ class OutflowD3Q27
if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 10 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(0), 7) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
@@ -228,8 +233,8 @@ class OutflowD3Q27
if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 14 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(1), 17) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
@@ -246,8 +251,8 @@ class OutflowD3Q27
if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 18 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(0), it.z() + cell_idx_c(-1), 13) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
@@ -264,8 +269,8 @@ class OutflowD3Q27
if ( isFlagSet( it.neighbor(1, 1, 1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 19 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(1), 26) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
@@ -282,8 +287,8 @@ class OutflowD3Q27
if ( isFlagSet( it.neighbor(1, -1, 1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 21 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(1), 24) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
@@ -300,8 +305,8 @@ class OutflowD3Q27
if ( isFlagSet( it.neighbor(1, 1, -1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 23 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(1), it.z() + cell_idx_c(-1), 22) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
@@ -318,8 +323,8 @@ class OutflowD3Q27
if ( isFlagSet( it.neighbor(1, -1, -1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 25 );
- element.pdf = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20);
- element.pdf_nd = pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20);
+ element.pdf = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20) );
+ element.pdf_nd = double( pdfs->get(it.x() + cell_idx_c(0), it.y() + cell_idx_c(-1), it.z() + cell_idx_c(-1), 20) );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
diff --git a/src/lbm_generated/boundary/UBBD3Q19.cpp b/src/lbm_generated/boundary/UBBD3Q19.cpp
index 0a88d2fe..7de5a364 100644
--- a/src/lbm_generated/boundary/UBBD3Q19.cpp
+++ b/src/lbm_generated/boundary/UBBD3Q19.cpp
@@ -45,9 +45,9 @@ namespace lbm {
#pragma diag_suppress 177
#endif
#endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
namespace internal_ubbd3q19_even {
-static FUNC_PREFIX void ubbd3q19_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z)
+static FUNC_PREFIX void ubbd3q19_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z)
{
const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13 };
@@ -56,7 +56,7 @@ static FUNC_PREFIX void ubbd3q19_even(const uint8_t * RESTRICT const _data_index
const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1 };
- const double weights [] = {0.33333333333333333, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.055555555555555556, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778, 0.027777777777777778};
+ const double weights [] = {((double)(0.33333333333333333)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.055555555555555556)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778)), ((double)(0.027777777777777778))};
@@ -66,15 +66,16 @@ static FUNC_PREFIX void ubbd3q19_even(const uint8_t * RESTRICT const _data_index
for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
{
- const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
- const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
- const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
- const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
- _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = (u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*-1.0*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
+ const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
+ const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
+ const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
+ const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+ _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -(u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
}
}
}
+//NOLINTEND(readability-non-const-parameter*)
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
@@ -101,8 +102,8 @@ void UBBD3Q19::run_impl(IBlock * block, IndexVectors::Type type)
uint8_t timestep = pdfs->getTimestep();
auto & u_y = u_y_;
- auto & u_x = u_x_;
auto & u_z = u_z_;
+ auto & u_x = u_x_;
WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
double * RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
diff --git a/src/lbm_generated/boundary/UBBD3Q19.h b/src/lbm_generated/boundary/UBBD3Q19.h
index f57bac12..592d0000 100644
--- a/src/lbm_generated/boundary/UBBD3Q19.h
+++ b/src/lbm_generated/boundary/UBBD3Q19.h
@@ -19,6 +19,7 @@
#pragma once
#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
#include "field/GhostLayerField.h"
#include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
#define RESTRICT
#endif
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
namespace walberla {
namespace lbm {
diff --git a/src/lbm_generated/boundary/UBBD3Q27.cpp b/src/lbm_generated/boundary/UBBD3Q27.cpp
index 08ee3ef3..b2cc6655 100644
--- a/src/lbm_generated/boundary/UBBD3Q27.cpp
+++ b/src/lbm_generated/boundary/UBBD3Q27.cpp
@@ -45,9 +45,9 @@ namespace lbm {
#pragma diag_suppress 177
#endif
#endif
-
+//NOLINTBEGIN(readability-non-const-parameter*)
namespace internal_ubbd3q27_even {
-static FUNC_PREFIX void ubbd3q27_even(const uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z)
+static FUNC_PREFIX void ubbd3q27_even(uint8_t * RESTRICT const _data_indexVector, double * RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize, double u_x, double u_y, double u_z)
{
const int32_t f_in_inv_dir_idx [] = { 0,2,1,4,3,6,5,10,9,8,7,16,15,18,17,12,11,14,13,26,25,24,23,22,21,20,19 };
@@ -56,7 +56,7 @@ static FUNC_PREFIX void ubbd3q27_even(const uint8_t * RESTRICT const _data_index
const int32_t f_in_inv_offsets_z [] = { 0,0,0,0,0,1,-1,0,0,0,0,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1 };
- const double weights [] = {0.29629629629629630, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.074074074074074074, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.018518518518518519, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296, 0.0046296296296296296};
+ const double weights [] = {((double)(0.29629629629629630)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.074074074074074074)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.018518518518518519)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296)), ((double)(0.0046296296296296296))};
@@ -66,16 +66,16 @@ static FUNC_PREFIX void ubbd3q27_even(const uint8_t * RESTRICT const _data_index
for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1)
{
- const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
- const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
- const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
- const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
- _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = (u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*-1.0*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
+ const int32_t x = *((int32_t * )(& _data_indexVector[16*ctr_0]));
+ const int32_t y = *((int32_t * )(& _data_indexVector[16*ctr_0 + 4]));
+ const int32_t z = *((int32_t * )(& _data_indexVector[16*ctr_0 + 8]));
+ const int32_t dir = *((int32_t * )(& _data_indexVector[16*ctr_0 + 12]));
+ _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_0*f_in_inv_offsets_x[dir] + _stride_pdfs_1*y + _stride_pdfs_1*f_in_inv_offsets_y[dir] + _stride_pdfs_2*z + _stride_pdfs_2*f_in_inv_offsets_z[dir] + _stride_pdfs_3*f_in_inv_dir_idx[dir]] = -(u_x*6.0*((double)(neighbour_offset_x[dir])) + u_y*6.0*((double)(neighbour_offset_y[dir])) + u_z*6.0*((double)(neighbour_offset_z[dir])))*weights[dir] + _data_pdfs[_stride_pdfs_0*x + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir];
}
}
}
-
+//NOLINTEND(readability-non-const-parameter*)
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
@@ -102,8 +102,8 @@ void UBBD3Q27::run_impl(IBlock * block, IndexVectors::Type type)
uint8_t timestep = pdfs->getTimestep();
auto & u_y = u_y_;
- auto & u_x = u_x_;
auto & u_z = u_z_;
+ auto & u_x = u_x_;
WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
double * RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
diff --git a/src/lbm_generated/boundary/UBBD3Q27.h b/src/lbm_generated/boundary/UBBD3Q27.h
index b7836d69..9ce7c3eb 100644
--- a/src/lbm_generated/boundary/UBBD3Q27.h
+++ b/src/lbm_generated/boundary/UBBD3Q27.h
@@ -19,6 +19,7 @@
#pragma once
#include "core/DataTypes.h"
+#include "core/logging/Logging.h"
#include "field/GhostLayerField.h"
#include "domain_decomposition/BlockDataID.h"
@@ -40,6 +41,10 @@
#define RESTRICT
#endif
+#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
+using walberla::half;
+#endif
+
namespace walberla {
namespace lbm {
diff --git a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h
index 1b3e43a5..b74539e9 100644
--- a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h
+++ b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h
@@ -54,6 +54,12 @@ class NonuniformPackingKernelsWrapper
void localCopyDirection(PdfField_T* srcField, CellInterval& srcInterval, PdfField_T* dstField,
CellInterval& dstInterval, Direction dir) const = 0;
+ void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+ CellInterval dstInterval, Direction dir) const = 0;
+
+ void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval,
+ PdfField_T* dstField, CellInterval dstInterval, Direction dir) const = 0;
+
void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
stencil::Direction dir) const = 0;
@@ -110,6 +116,18 @@ class NonuniformPackingKernelsWrapper< PdfField_T, false >
kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir);
}
+ void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+ CellInterval dstInterval, Direction dir) const
+ {
+ kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir);
+ }
+
+ void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval,
+ PdfField_T* dstField, CellInterval dstInterval, Direction dir) const
+ {
+ kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir);
+ }
+
void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
stencil::Direction dir) const
{
@@ -194,10 +212,33 @@ class NonuniformPackingKernelsWrapper< PdfField_T, true >
kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, timestep);
}
+ void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+ CellInterval dstInterval, Direction dir) const
+ {
+ uint8_t timestep = srcField->getTimestep();
+ WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must "
+ "be on an odd timestep, while the source field could either be "
+ "on an even or an odd state.")
+ kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir, timestep);
+ }
+
+ void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskField* maskField, CellInterval srcInterval,
+ PdfField_T* dstField, CellInterval dstInterval, Direction dir) const
+ {
+ uint8_t timestep = dstField->getTimestep();
+ WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must "
+ "be on an even timestep, while the source field could either be "
+ "on an even or an odd state.")
+ kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, timestep);
+ }
+
void unpackRedistribute(PdfField_T* dstField, CellInterval& ci, unsigned char* inBuffer,
stencil::Direction dir) const
{
uint8_t timestep = dstField->getTimestep();
+ WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must "
+ "be on an odd timestep, while the source field could either be "
+ "on an even or an odd state.")
kernels_.unpackRedistribute(dstField, ci, inBuffer, dir, timestep);
}
@@ -205,6 +246,9 @@ class NonuniformPackingKernelsWrapper< PdfField_T, true >
unsigned char* outBuffer, Direction dir) const
{
uint8_t timestep = srcField->getTimestep();
+ WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must "
+ "be on an even timestep, while the source field could either be "
+ "on an even or an odd state.")
kernels_.packPartialCoalescence(srcField, maskField, ci, outBuffer, dir, timestep);
}
diff --git a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h
index cf36a61f..7c45d93d 100644
--- a/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h
+++ b/src/lbm_generated/communication/NonuniformGeneratedPdfPackInfo.impl.h
@@ -167,19 +167,19 @@ void NonuniformGeneratedPdfPackInfo< PdfField_T >::communicateLocalCoarseToFine(
Direction unpackDir = dstIntervals[index].first;
CellInterval dstInterval = dstIntervals[index].second;
- uint_t packSize = kernels_.size(srcInterval);
-
#ifndef NDEBUG
Direction const packDir = srcIntervals[index].first;
WALBERLA_ASSERT_EQUAL(packDir, stencil::inverseDir[unpackDir])
uint_t unpackSize = kernels_.redistributeSize(dstInterval);
- WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+ WALBERLA_ASSERT_EQUAL(kernels_.size(srcInterval), unpackSize)
#endif
// TODO: This is a dirty workaround. Code-generate direct redistribution!
- std::vector< unsigned char > buffer(packSize);
+ std::vector< unsigned char > buffer(kernels_.size(srcInterval));
kernels_.packAll(srcField, srcInterval, &buffer[0]);
kernels_.unpackRedistribute(dstField, dstInterval, &buffer[0], unpackDir);
+
+ // kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, unpackDir);
}
}
@@ -228,20 +228,20 @@ void walberla::lbm_generated::NonuniformGeneratedPdfPackInfo< PdfField_T >::comm
CellInterval srcInterval;
srcField->getGhostRegion(dir, srcInterval, 2);
- uint_t packSize = kernels_.partialCoalescenceSize(srcInterval, dir);
CellInterval dstInterval = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender->getId(),
invDir, dstField);
#ifndef NDEBUG
uint_t unpackSize = kernels_.size(dstInterval, invDir);
- WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+ WALBERLA_ASSERT_EQUAL(kernels_.partialCoalescenceSize(srcInterval, dir), unpackSize)
#endif
// TODO: This is a dirty workaround. Code-generate direct redistribution!
- std::vector< unsigned char > buffer(packSize);
+ std::vector< unsigned char > buffer(kernels_.partialCoalescenceSize(srcInterval, dir));
kernels_.packPartialCoalescence(srcField, maskField, srcInterval, &buffer[0], dir);
kernels_.unpackCoalescence(dstField, dstInterval, &buffer[0], invDir);
+ // kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir);
}
template< typename PdfField_T>
diff --git a/src/lbm_generated/evaluation/PerformanceEvaluation.h b/src/lbm_generated/evaluation/PerformanceEvaluation.h
index 36f112ac..98134e0d 100644
--- a/src/lbm_generated/evaluation/PerformanceEvaluation.h
+++ b/src/lbm_generated/evaluation/PerformanceEvaluation.h
@@ -92,6 +92,16 @@ class PerformanceEvaluationBase
return c;
}
+ uint64_t numberOfCells() const
+ {
+ return cells_.numberOfCells();
+ }
+
+ uint64_t numberOfFluidCells() const
+ {
+ return fluidCells_.numberOfCells();
+ }
+
double mlups( const uint_t timeSteps, const double time ) const
{
double m( 0.0 );
diff --git a/src/lbm_generated/field/AddToStorage.h b/src/lbm_generated/field/AddToStorage.h
index afb86819..95b0089d 100644
--- a/src/lbm_generated/field/AddToStorage.h
+++ b/src/lbm_generated/field/AddToStorage.h
@@ -41,7 +41,7 @@ class PdfFieldHandling : public field::BlockDataHandling< PdfField;
PdfFieldHandling( const weak_ptr< StructuredBlockStorage > & blocks, const LatticeStorageSpecification_T & storageSpecification,
- const uint_t nrOfGhostLayers, const field::Layout & layout, const shared_ptr< field::FieldAllocator > alloc = nullptr ) :
+ const uint_t nrOfGhostLayers, const field::Layout & layout, const shared_ptr< field::FieldAllocator > alloc = nullptr ) :
blocks_( blocks ), storageSpecification_( storageSpecification ),
nrOfGhostLayers_( nrOfGhostLayers ), layout_( layout ), alloc_( alloc ){}
@@ -106,7 +106,7 @@ class PdfFieldHandling : public field::BlockDataHandling< PdfField > alloc_;
+ shared_ptr< field::FieldAllocator > alloc_;
}; // class PdfFieldHandling
@@ -121,10 +121,10 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c
const field::Layout & layout = field::fzyx,
const Set & requiredSelectors = Set::emptySet(),
const Set & incompatibleSelectors = Set::emptySet(),
- const shared_ptr< field::FieldAllocator > alloc = nullptr)
+ const shared_ptr< field::FieldAllocator > alloc = nullptr)
{
return blocks->addBlockData( make_shared< internal::PdfFieldHandling< LatticeStorageSpecification_T > >(
- blocks, storageSpecification, ghostLayers, layout, alloc ),
+ blocks, storageSpecification, ghostLayers, layout, alloc ),
identifier, requiredSelectors, incompatibleSelectors );
}
@@ -134,7 +134,7 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c
const field::Layout & layout = field::fzyx,
const Set & requiredSelectors = Set::emptySet(),
const Set & incompatibleSelectors = Set::emptySet(),
- const shared_ptr< field::FieldAllocator > alloc = nullptr)
+ const shared_ptr< field::FieldAllocator > alloc = nullptr)
{
auto ghostLayers = uint_c(1);
@@ -148,7 +148,7 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c
const LatticeStorageSpecification_T & storageSpecification,
const Set & requiredSelectors = Set::emptySet(),
const Set & incompatibleSelectors = Set::emptySet(),
- const shared_ptr< field::FieldAllocator > alloc = nullptr)
+ const shared_ptr< field::FieldAllocator > alloc = nullptr)
{
auto ghostLayers = uint_c(1);
auto layout = field::fzyx;
@@ -161,7 +161,7 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c
template< typename LatticeStorageSpecification_T, typename BlockStorage_T >
BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, const std::string & identifier,
const LatticeStorageSpecification_T & storageSpecification,
- const shared_ptr< field::FieldAllocator > alloc = nullptr)
+ const shared_ptr< field::FieldAllocator > alloc = nullptr)
{
auto ghostLayers = uint_c(1);
auto layout = field::fzyx;
@@ -177,7 +177,7 @@ template< typename LatticeStorageSpecification_T, typename BlockStorage_T >
BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, const std::string & identifier,
const LatticeStorageSpecification_T & storageSpecification,
const field::Layout & layout = field::fzyx,
- const shared_ptr< field::FieldAllocator > alloc = nullptr)
+ const shared_ptr< field::FieldAllocator > alloc = nullptr)
{
auto ghostLayers = uint_c(1);
auto requiredSelectors = Set::emptySet();
@@ -193,7 +193,7 @@ BlockDataID addPdfFieldToStorage( const shared_ptr< BlockStorage_T > & blocks, c
const LatticeStorageSpecification_T & storageSpecification,
const uint_t ghostLayers,
const field::Layout & layout,
- const shared_ptr< field::FieldAllocator > alloc)
+ const shared_ptr< field::FieldAllocator > alloc)
{
auto requiredSelectors = Set::emptySet();
auto incompatibleSelectors = Set::emptySet();
diff --git a/src/lbm_generated/field/PdfField.h b/src/lbm_generated/field/PdfField.h
index 6e6b7ee8..2dce8976 100644
--- a/src/lbm_generated/field/PdfField.h
+++ b/src/lbm_generated/field/PdfField.h
@@ -28,7 +28,7 @@
namespace walberla::lbm_generated {
template< typename LatticeStorageSpecification_T >
-class PdfField : public GhostLayerField< real_t, LatticeStorageSpecification_T::Stencil::Size >
+class PdfField : public GhostLayerField< typename LatticeStorageSpecification_T::value_type, LatticeStorageSpecification_T::Stencil::Size >
{
public:
@@ -38,17 +38,17 @@ class PdfField : public GhostLayerField< real_t, LatticeStorageSpecification_T::
using LatticeStorageSpecification = LatticeStorageSpecification_T;
using Stencil = typename LatticeStorageSpecification_T::Stencil;
- using value_type = typename GhostLayerField::value_type;
+ using value_type = typename LatticeStorageSpecification_T::value_type;
- using Ptr = typename GhostLayerField::Ptr;
- using ConstPtr = typename GhostLayerField::ConstPtr;
+ using Ptr = typename GhostLayerField::Ptr;
+ using ConstPtr = typename GhostLayerField::ConstPtr;
//@}
//*******************************************************************************************************************
PdfField( const uint_t _xSize, const uint_t _ySize, const uint_t _zSize,
const LatticeStorageSpecification_T & storageSpecification,
const uint_t ghostLayers = uint_t(1), const field::Layout & _layout = field::zyxf,
- const shared_ptr< field::FieldAllocator > & alloc = shared_ptr< field::FieldAllocator >() );
+ const shared_ptr< field::FieldAllocator > & alloc = shared_ptr< field::FieldAllocator >() );
~PdfField() override = default;
@@ -61,19 +61,19 @@ class PdfField : public GhostLayerField< real_t, LatticeStorageSpecification_T::
// Access functions (with stencil::Direction!) //
/////////////////////////////////////////////////
- using GhostLayerField< real_t, Stencil::Size >::get;
+ using GhostLayerField< value_type, Stencil::Size >::get;
- real_t & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) { return get( x, y, z, Stencil::idx[d] ); }
- const real_t & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); }
- real_t & get( const Cell & c, stencil::Direction d ) { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
- const real_t & get( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
+ value_type & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) { return get( x, y, z, Stencil::idx[d] ); }
+ const value_type & get( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); }
+ value_type & get( const Cell & c, stencil::Direction d ) { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
+ const value_type & get( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
- using GhostLayerField< real_t, Stencil::Size >::operator();
+ using GhostLayerField< value_type, Stencil::Size >::operator();
- real_t & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) { return get( x, y, z, Stencil::idx[d] ); }
- const real_t & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); }
- real_t & operator()( const Cell & c, stencil::Direction d ) { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
- const real_t & operator()( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
+ value_type & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) { return get( x, y, z, Stencil::idx[d] ); }
+ const value_type & operator()( cell_idx_t x, cell_idx_t y, cell_idx_t z, stencil::Direction d ) const { return get( x, y, z, Stencil::idx[d] ); }
+ value_type & operator()( const Cell & c, stencil::Direction d ) { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
+ const value_type & operator()( const Cell & c, stencil::Direction d ) const { return get( c.x(), c.y(), c.z(), Stencil::idx[d] ); }
protected:
@@ -81,7 +81,7 @@ class PdfField : public GhostLayerField< real_t, LatticeStorageSpecification_T::
/*! \name Shallow Copy */
//@{
inline PdfField( const PdfField< LatticeStorageSpecification_T > & other );
- Field< real_t, Stencil::Size > * cloneShallowCopyInternal() const override { return new PdfField< LatticeStorageSpecification_T >( *this ); }
+ Field< value_type, Stencil::Size > * cloneShallowCopyInternal() const override { return new PdfField< LatticeStorageSpecification_T >( *this ); }
//@}
//*******************************************************************************************************************
@@ -94,17 +94,17 @@ template< typename LatticeStorageSpecification_T >
PdfField< LatticeStorageSpecification_T >::PdfField( const uint_t _xSize, const uint_t _ySize, const uint_t _zSize,
const LatticeStorageSpecification_T & storageSpecification,
const uint_t ghostLayers, const field::Layout & _layout,
- const shared_ptr< field::FieldAllocator > & alloc ) :
+ const shared_ptr< field::FieldAllocator > & alloc ) :
- GhostLayerField< real_t, Stencil::Size >( _xSize, _ySize, _zSize, ghostLayers, _layout, alloc ),
+ GhostLayerField< value_type, Stencil::Size >( _xSize, _ySize, _zSize, ghostLayers, _layout, alloc ),
storageSpecification_( storageSpecification )
{
#ifdef _OPENMP
// take care of proper thread<->memory assignment (first-touch allocation policy !)
- this->setWithGhostLayer( real_t(0) );
+ this->setWithGhostLayer( value_type(0) );
#endif
- this->setWithGhostLayer( real_t(0) );
+ this->setWithGhostLayer( value_type(0) );
}
@@ -112,24 +112,24 @@ PdfField< LatticeStorageSpecification_T >::PdfField( const uint_t _xSize, const
template< typename LatticeStorageSpecification_T >
inline PdfField< LatticeStorageSpecification_T > * PdfField< LatticeStorageSpecification_T >::clone() const
{
- return dynamic_cast< PdfField * >( GhostLayerField< real_t, Stencil::Size >::clone() );
+ return dynamic_cast< PdfField * >( GhostLayerField< value_type, Stencil::Size >::clone() );
}
template< typename LatticeStorageSpecification_T >
inline PdfField< LatticeStorageSpecification_T > * PdfField< LatticeStorageSpecification_T >::cloneUninitialized() const
{
- return dynamic_cast< PdfField * >( GhostLayerField< real_t, Stencil::Size >::cloneUninitialized() );
+ return dynamic_cast< PdfField * >( GhostLayerField< value_type, Stencil::Size >::cloneUninitialized() );
}
template< typename LatticeStorageSpecification_T >
inline PdfField< LatticeStorageSpecification_T > * PdfField< LatticeStorageSpecification_T >::cloneShallowCopy() const
{
- return dynamic_cast< PdfField * >( GhostLayerField< real_t, Stencil::Size >::cloneShallowCopy() );
+ return dynamic_cast< PdfField * >( GhostLayerField< value_type, Stencil::Size >::cloneShallowCopy() );
}
template< typename LatticeStorageSpecification_T >
inline PdfField< LatticeStorageSpecification_T >::PdfField( const PdfField< LatticeStorageSpecification_T > & other )
- : GhostLayerField< real_t, Stencil::Size >::GhostLayerField( other )
+ : GhostLayerField< value_type, Stencil::Size >::GhostLayerField( other )
{
}
diff --git a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h
index 8d95855e..ff963ed5 100644
--- a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h
+++ b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.h
@@ -33,6 +33,7 @@ namespace walberla
{
using gpu::communication::NonUniformGPUScheme;
+using BlockFunction = std::function; // parameters: level
namespace lbm_generated
{
@@ -67,26 +68,53 @@ class BasicRecursiveTimeStepGPU
"Template parameter PdfField_T is of different type than BlockDataID pdfFieldId that is "
"provided as constructor argument")
#endif
+ useStreams_ = false;
maxLevel_ = sbfs->getDepth();
+ streams_.resize(maxLevel_ + 1);
+ timestepPerLevel_.resize(maxLevel_ + 1);
- for (uint_t level = 0; level <= maxLevel_; level++)
- {
+ for (uint_t level = 0; level <= maxLevel_; level++){
std::vector< Block* > blocks;
sbfs->getBlocks(blocks, level);
blocks_.push_back(blocks);
+ streams_[level].resize(nStreams_);
+ timestepPerLevel_[level] = uint8_c(0);
+ }
+ for (uint_t level = 0; level <= maxLevel_; level++){
+ for (uint_t i = 0; i < nStreams_; i++){
+ streams_[level][i] = nullptr;
+ }
}
};
- ~BasicRecursiveTimeStepGPU() = default;
+ ~BasicRecursiveTimeStepGPU(){
+ if(useStreams_){
+ for (uint_t level = 0; level <= maxLevel_; level++){
+ for (uint_t i = 0; i < nStreams_; i++)
+ WALBERLA_GPU_CHECK(gpuStreamDestroy(streams_[level][i]))
+ }
+ }
+ }
+
+ void activateStreams(){
+ WALBERLA_LOG_INFO_ON_ROOT("Updating blocks using " << nStreams_ << " GPU Streams")
+ for (uint_t level = 0; level <= maxLevel_; level++){
+ for (uint_t i = 0; i < nStreams_; i++)
+ WALBERLA_GPU_CHECK(gpuStreamCreate(&streams_[level][i]))
+ }
+ useStreams_ = true;
+ }
+
void operator()() { timestep(0); };
void addRefinementToTimeLoop(SweepTimeloop& timeloop, uint_t level = 0);
- void test(uint_t maxLevel, uint_t level = 0);
+ void addPostBoundaryHandlingBlockFunction( const BlockFunction & function );
private:
void timestep(uint_t level);
void ghostLayerPropagation(Block* block, gpuStream_t gpuStream);
std::function< void() > executeStreamCollideOnLevel(uint_t level, bool withGhostLayerPropagation = false);
+ std::function< void() > executePostBoundaryBlockFunctions(uint_t level);
std::function< void() > executeBoundaryHandlingOnLevel(uint_t level);
@@ -100,6 +128,12 @@ class BasicRecursiveTimeStepGPU
SweepCollection_T& sweepCollection_;
BoundaryCollection_T& boundaryCollection_;
+ std::vector< BlockFunction > globalPostBoundaryHandlingBlockFunctions_;
+
+ std::vector< std::vector< gpuStream_t >> streams_;
+ uint_t nStreams_{uint_c(6)};
+ bool useStreams_;
+ std::vector< uint8_t > timestepPerLevel_;
};
} // namespace lbm_generated
diff --git a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h
index 6665cf35..0327b9b5 100644
--- a/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h
+++ b/src/lbm_generated/gpu/BasicRecursiveTimeStepGPU.impl.h
@@ -28,18 +28,13 @@ namespace lbm_generated {
template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::timestep(uint_t level)
{
- std::vector blocks;
- sbfs_->getBlocks(blocks, level);
-
- uint_t maxLevel = sbfs_->getDepth();
-
// 1.1 Collision
- for(auto b: blocks){
+ for(auto b: blocks_[level]){
sweepCollection_.streamCollide(b);
}
// 1.2 Recursive Descent
- if(level < maxLevel){
+ if(level < maxLevel_){
timestep(level + 1);
}
@@ -52,13 +47,13 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
commScheme_->communicateEqualLevel(level);
// 1.5 Boundary Handling and Coalescence Preparation
- for(auto b : blocks){
+ for(auto b : blocks_[level]){
boundaryCollection_(b, nullptr);
- if(level != maxLevel) pdfFieldPackInfo_->prepareCoalescence(b);
+ if(level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b);
}
// 1.6 Fine to Coarse Communication, receiving end
- if(level < maxLevel){
+ if(level < maxLevel_){
commScheme_->communicateFineToCoarse(level + 1);
}
@@ -67,13 +62,13 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
if(level == 0) return;
// 2.1 Collision and Ghost-Layer Propagation
- for(auto b: blocks){
+ for(auto b: blocks_[level]){
ghostLayerPropagation(b); // GL-Propagation first without swapping arrays...
sweepCollection_.streamCollide(b); // then Stream-Collide on interior, and swap arrays
}
// 2.2 Recursive Descent
- if(level < maxLevel){
+ if(level < maxLevel_){
timestep(level + 1);
}
@@ -81,13 +76,13 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
commScheme_->communicateEqualLevel(level);
// 2.5 Boundary Handling and Coalescence Preparation
- for(auto b : blocks){
+ for(auto b : blocks_[level]){
boundaryCollection_(b, nullptr);
- if(level != maxLevel) pdfFieldPackInfo_->prepareCoalescence(b);
+ if(level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b);
}
// 2.6 Fine to Coarse Communication, receiving end
- if(level < maxLevel){
+ if(level < maxLevel_){
commScheme_->communicateFineToCoarse(level + 1);
}
}
@@ -115,6 +110,7 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
// 1.5 Boundary Handling and Coalescence Preparation
timeloop.addFuncBeforeTimeStep(executeBoundaryHandlingOnLevel(level), "Refinement Cycle: boundary handling on level " + std::to_string(level));
+ timeloop.addFuncBeforeTimeStep(executePostBoundaryBlockFunctions(level), "Refinement Cycle: post boundary handling block functions on level " + std::to_string(level));
// 1.6 Fine to Coarse Communication, receiving end
if(level < maxLevel_){
@@ -138,6 +134,7 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
// 2.5 Boundary Handling and Coalescence Preparation
timeloop.addFuncBeforeTimeStep(executeBoundaryHandlingOnLevel(level), "Refinement Cycle: boundary handling on level " + std::to_string(level));
+ timeloop.addFuncBeforeTimeStep(executePostBoundaryBlockFunctions(level), "Refinement Cycle: post boundary handling block functions on level " + std::to_string(level));
// 2.6 Fine to Coarse Communication, receiving end
if(level < maxLevel_)
@@ -145,91 +142,65 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
}
-template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
-void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::test(uint_t maxLevel, uint_t level)
-{
- // 1.1 Collision
- WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: streamCollide on level " + std::to_string(level));
-
- // 1.2 Recursive Descent
- if(level < maxLevel){
- test(maxLevel, level + 1);
- }
-
- // 1.3 Coarse to Fine Communication, receiving end
- if(level != 0){
- WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate coarse to fine on level " + std::to_string(level));
- }
-
- // 1.4 Equal-Level Communication
- WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate equal level on level " + std::to_string(level));
-
-
- // 1.5 Boundary Handling and Coalescence Preparation
- WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: boundary handling on level " + std::to_string(level));
-
- // 1.6 Fine to Coarse Communication, receiving end
- if(level < maxLevel){
- WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate fine to coarse on level " + std::to_string(level + 1));
- }
-
- // Stop here if on coarsest level.
- // Otherwise, continue to second subcycle.
- if(level == 0) return;
-
- // 2.1 Collision and Ghost-Layer Propagation
- WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: streamCollide with ghost layer propagation on level " + std::to_string(level));
-
- // 2.2 Recursive Descent
- if(level < maxLevel)
- test(maxLevel, level + 1);
-
-
- // 2.4 Equal-Level Communication
- WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate equal level on level " + std::to_string(level));
-
- // 2.5 Boundary Handling and Coalescence Preparation
- WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: boundary handling on level " + std::to_string(level));
-
- // 2.6 Fine to Coarse Communication, receiving end
- if(level < maxLevel)
- WALBERLA_LOG_INFO_ON_ROOT("Refinement Cycle: communicate fine to coarse on level " + std::to_string(level + 1));
-
-}
-
template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
std::function BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executeStreamCollideOnLevel(uint_t level, bool withGhostLayerPropagation)
{
- return [level, withGhostLayerPropagation, this]()
- {
- if (withGhostLayerPropagation)
- {
- for(auto b: blocks_[level]){
- ghostLayerPropagation(b, nullptr);
- sweepCollection_.streamCollide(b, 0, nullptr);
+ if(sweepCollection_.blockWise()){
+ return [level, withGhostLayerPropagation, this](){
+ if (withGhostLayerPropagation){
+ const uint8_t timestepPlusOne = (timestepPerLevel_[level] + 1) & 1;
+ sweepCollection_.ghostLayerPropagation(level, timestepPlusOne);
+ WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+ WALBERLA_GPU_CHECK(gpuPeekAtLastError())
+ timestepPerLevel_[level] = (timestepPerLevel_[level] + 1) & 1;
+ sweepCollection_.streamCollideOverBlocks(level, timestepPerLevel_[level]);
+ for (uint_t i = 0; i < blocks_[level].size(); i++){
+ auto pdfs = blocks_[level][i]->getData< PdfField_T >(pdfFieldId_);
+ pdfs->advanceTimestep();
+ }
+ commScheme_->setTimestepForLevel(level, timestepPerLevel_[level]);
+ WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+ WALBERLA_GPU_CHECK(gpuPeekAtLastError())
}
- }
- else
- {
- for(auto b: blocks_[level]){
- sweepCollection_.streamCollide(b, 0, nullptr);
+ else{
+ timestepPerLevel_[level] = (timestepPerLevel_[level] + 1) & 1;
+ sweepCollection_.streamCollideOverBlocks(level, timestepPerLevel_[level]);
+ for (uint_t i = 0; i < blocks_[level].size(); i++){
+ auto pdfs = blocks_[level][i]->getData< PdfField_T >(pdfFieldId_);
+ pdfs->advanceTimestep();
+ }
+ commScheme_->setTimestepForLevel(level, timestepPerLevel_[level]);
+ WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
+ WALBERLA_GPU_CHECK(gpuPeekAtLastError())
}
- }
- WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
- };
+ };
+ }
+ else{
+ return [level, withGhostLayerPropagation, this](){
+ if (withGhostLayerPropagation){
+ for (uint_t i = 0; i < blocks_[level].size(); i++){
+ ghostLayerPropagation(blocks_[level][i], streams_[level][i % nStreams_]);
+ sweepCollection_.streamCollide(blocks_[level][i], 0, streams_[level][i % nStreams_]);
+ }
+ }
+ else{
+ for (uint_t i = 0; i < blocks_[level].size(); i++){
+ sweepCollection_.streamCollide(blocks_[level][i], 0, streams_[level][i % nStreams_]);
+ }
+ }
+ };
+ }
}
template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
std::function BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executeBoundaryHandlingOnLevel(uint_t level)
{
return [this, level]() {
- for (auto b : blocks_[level])
- {
- boundaryCollection_(b, nullptr);
- if (level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(b, nullptr);
+ for (uint_t i = 0; i < blocks_[level].size(); i++){
+ boundaryCollection_(blocks_[level][i], streams_[level][i % nStreams_]);
+ if (level != maxLevel_) pdfFieldPackInfo_->prepareCoalescence(blocks_[level][i], streams_[level][i % nStreams_]);
}
- WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
};
}
@@ -251,5 +222,23 @@ void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollectio
}
}
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+std::function BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::executePostBoundaryBlockFunctions(uint_t level)
+{
+ return [this, level]() {
+ for( const auto& func : globalPostBoundaryHandlingBlockFunctions_ ){
+ func(level);
+ }
+ };
+}
+
+
+template< typename PdfField_T, typename SweepCollection_T, typename BoundaryCollection_T >
+inline void BasicRecursiveTimeStepGPU< PdfField_T, SweepCollection_T, BoundaryCollection_T >::addPostBoundaryHandlingBlockFunction( const BlockFunction & function )
+{
+ globalPostBoundaryHandlingBlockFunctions_.emplace_back( function );
+}
+
+
} // namespace lbm_generated
} // namespace walberla
diff --git a/src/lbm_generated/gpu/GPUPdfField.h b/src/lbm_generated/gpu/GPUPdfField.h
index 1a9f59a1..f67a84fe 100644
--- a/src/lbm_generated/gpu/GPUPdfField.h
+++ b/src/lbm_generated/gpu/GPUPdfField.h
@@ -28,7 +28,7 @@ using namespace walberla::gpu;
namespace walberla::lbm_generated {
template< typename LatticeStorageSpecification_T >
-class GPUPdfField : public GPUField< real_t >
+class GPUPdfField : public GPUField< typename LatticeStorageSpecification_T::value_type >
{
public:
@@ -38,7 +38,7 @@ class GPUPdfField : public GPUField< real_t >
using LatticeStorageSpecification = LatticeStorageSpecification_T;
using Stencil = typename LatticeStorageSpecification_T::Stencil;
- using value_type = typename GPUField::value_type;
+ using value_type = typename LatticeStorageSpecification_T::value_type;
//@}
//*******************************************************************************************************************
@@ -59,7 +59,7 @@ template< typename LatticeStorageSpecification_T >
GPUPdfField< LatticeStorageSpecification_T >::GPUPdfField( uint_t _xSize, uint_t _ySize, uint_t _zSize,
const LatticeStorageSpecification_T & storageSpecification,
uint_t ghostLayers, const Layout & layout, bool usePitchedMem) :
- GPUField< real_t>( _xSize, _ySize, _zSize, LatticeStorageSpecification_T::Stencil::Size, ghostLayers, layout, usePitchedMem ), storageSpecification_( storageSpecification )
+ GPUField( _xSize, _ySize, _zSize, LatticeStorageSpecification_T::Stencil::Size, ghostLayers, layout, usePitchedMem ), storageSpecification_( storageSpecification )
{
}
diff --git a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h
index 585d1db3..a780bf48 100644
--- a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h
+++ b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h
@@ -42,6 +42,7 @@ template< typename PdfField_T, bool inplace >
class NonuniformGPUPackingKernelsWrapper
{
public:
+ using value_type = typename PdfField_T::value_type;
void packAll(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, gpuStream_t stream ) const = 0;
void unpackAll(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, gpuStream_t stream ) const = 0;
void localCopyAll(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
@@ -49,8 +50,15 @@ class NonuniformGPUPackingKernelsWrapper
void packDirection(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, Direction dir, gpuStream_t stream ) const = 0;
void unpackDirection(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer, Direction dir, gpuStream_t stream ) const = 0;
- void localCopyDirection(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
- CellInterval dstInterval, Direction dir, gpuStream_t stream) const = 0;
+ void localCopyDirection(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField, CellInterval dstInterval, Direction dir, gpuStream_t stream) const = 0;
+ void blockLocalCopyDirection(value_type** data_pdfs_src_dp, value_type** data_pdfs_dst_dp, Direction dir, uint8_t timestep, gpuStream_t stream, std::array& sizes, std::array& strides) const = 0;
+
+
+ void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+ CellInterval dstInterval, Direction dir, gpuStream_t stream) const = 0;
+
+ void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskFieldGPU* maskField, CellInterval srcInterval,
+ PdfField_T* dstField, CellInterval dstInterval, Direction dir, gpuStream_t stream) const = 0;
void unpackRedistribute(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer,
stencil::Direction dir, gpuStream_t stream ) const = 0;
@@ -64,6 +72,8 @@ class NonuniformGPUPackingKernelsWrapper
uint_t size(CellInterval ci) const = 0;
uint_t redistributeSize(CellInterval ci) const = 0;
uint_t partialCoalescenceSize(CellInterval ci, Direction dir) const = 0;
+
+ bool blockWise() const = 0;
};
/*
@@ -75,6 +85,7 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, false >
public:
using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
using PackingKernels_T = typename LatticeStorageSpecification_T::PackKernels;
+ using value_type = typename PdfField_T::value_type;
void packAll(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, gpuStream_t stream = nullptr) const
{
@@ -108,6 +119,23 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, false >
kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, stream);
}
+ void blockLocalCopyDirection(value_type** data_pdfs_src_dp, value_type** data_pdfs_dst_dp, Direction dir, uint8_t /*timestep*/, gpuStream_t stream, std::array& sizes, std::array& strides) const
+ {
+ kernels_.localCopyDirection(data_pdfs_src_dp, data_pdfs_dst_dp, dir, stream, sizes, strides);
+ }
+
+ void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+ CellInterval dstInterval, Direction dir, gpuStream_t stream) const
+ {
+ kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir, stream);
+ }
+
+ void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskFieldGPU* maskField, CellInterval srcInterval,
+ PdfField_T* dstField, CellInterval dstInterval, Direction dir, gpuStream_t stream) const
+ {
+ kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, stream);
+ }
+
void unpackRedistribute(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer,
stencil::Direction dir, gpuStream_t stream = nullptr) const
{
@@ -138,6 +166,8 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, false >
return kernels_.partialCoalescenceSize(ci, dir);
}
+ bool blockWise() const {return kernels_.blockWise;}
+
private:
PackingKernels_T kernels_;
};
@@ -151,6 +181,7 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, true >
public:
using LatticeStorageSpecification_T = typename PdfField_T::LatticeStorageSpecification;
using PackingKernels_T = typename LatticeStorageSpecification_T::PackKernels;
+ using value_type = typename PdfField_T::value_type;
void packAll(PdfField_T* srcField, CellInterval ci, unsigned char* outBuffer, gpuStream_t stream = nullptr) const
{
@@ -192,10 +223,39 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, true >
kernels_.localCopyDirection(srcField, srcInterval, dstField, dstInterval, dir, timestep, stream);
}
+ void blockLocalCopyDirection(value_type** data_pdfs_src_dp, value_type** data_pdfs_dst_dp, Direction dir, uint8_t timestep, gpuStream_t stream, std::array& sizes, std::array& strides) const
+ {
+ kernels_.localCopyDirection(data_pdfs_src_dp, data_pdfs_dst_dp, dir, timestep, stream, sizes, strides);
+ }
+
+
+ void localCopyRedistribute(PdfField_T* srcField, CellInterval srcInterval, PdfField_T* dstField,
+ CellInterval dstInterval, Direction dir, gpuStream_t stream) const
+ {
+ uint8_t timestep = srcField->getTimestep();
+ WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must "
+ "be on an odd timestep, while the source field could either be "
+ "on an even or an odd state.")
+ kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, dir, timestep, stream);
+ }
+
+ void localPartialCoalescence(PdfField_T* srcField, PartialCoalescenceMaskFieldGPU* maskField, CellInterval srcInterval,
+ PdfField_T* dstField, CellInterval dstInterval, Direction dir, gpuStream_t stream) const
+ {
+ uint8_t timestep = dstField->getTimestep();
+ WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must "
+ "be on an even timestep, while the source field could either be "
+ "on an even or an odd state.")
+ kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, timestep, stream);
+ }
+
void unpackRedistribute(PdfField_T* dstField, CellInterval ci, unsigned char* inBuffer,
stencil::Direction dir, gpuStream_t stream = nullptr) const
{
uint8_t timestep = dstField->getTimestep();
+ WALBERLA_ASSERT(!((dstField->getTimestep() & 1) ^ 1), "When the course to fine step is executed, the fine Field must "
+ "be on an odd timestep, while the source field could either be "
+ "on an even or an odd state.")
kernels_.unpackRedistribute(dstField, ci, inBuffer, dir, timestep, stream);
}
@@ -203,6 +263,9 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, true >
unsigned char* outBuffer, Direction dir, gpuStream_t stream = nullptr) const
{
uint8_t timestep = srcField->getTimestep();
+ WALBERLA_ASSERT((srcField->getTimestep() & 1) ^ 1, "When the fine to coarse step is executed, the fine Field must "
+ "be on an even timestep, while the source field could either be "
+ "on an even or an odd state.")
kernels_.packPartialCoalescence(srcField, maskField, ci, outBuffer, dir, timestep, stream);
}
@@ -226,6 +289,8 @@ class NonuniformGPUPackingKernelsWrapper< PdfField_T, true >
return kernels_.partialCoalescenceSize(ci, dir);
}
+ bool blockWise() const {return kernels_.blockWise;}
+
private:
PackingKernels_T kernels_;
};
@@ -243,17 +308,52 @@ class NonuniformGeneratedGPUPdfPackInfo : public walberla::gpu::GeneratedNonUnif
using Stencil = typename LatticeStorageSpecification_T::Stencil;
using CommunicationStencil = typename LatticeStorageSpecification_T::CommunicationStencil;
using CommData_T = NonuniformGPUCommData< LatticeStorageSpecification_T >;
+ using value_type = typename PdfField_T::value_type;
+
+ NonuniformGeneratedGPUPdfPackInfo(const uint64_t meshLevels, const BlockDataID pdfFieldID, const BlockDataID commDataID)
+ : pdfFieldID_(pdfFieldID), commDataID_(commDataID){ init(meshLevels); };
+
+ void init(const uint64_t meshLevels){
+ auto size = meshLevels * Stencil::Q;
+ equalCommSRC.resize(size);
+ equalCommDST.resize(size);
+ equalCommSRCGPU.resize(size);
+ equalCommDSTGPU.resize(size);
- NonuniformGeneratedGPUPdfPackInfo(const BlockDataID pdfFieldID, const BlockDataID commDataID)
- : pdfFieldID_(pdfFieldID), commDataID_(commDataID){};
+ }
+
+ void sync() override {
+ for (uint_t i = 0; i < equalCommSRC.size(); i++){
+ for (auto const& x : equalCommSRC[i]){
+ auto key = x.first;
+ WALBERLA_GPU_CHECK(gpuMalloc((void**) &equalCommSRCGPU[i][key], sizeof(value_type*) * equalCommSRC[i][key].size()));
+ WALBERLA_GPU_CHECK(gpuMemcpy(equalCommSRCGPU[i][key], &equalCommSRC[i][key][0],sizeof(value_type*) * equalCommSRC[i][key].size(), gpuMemcpyHostToDevice));
+
+ WALBERLA_GPU_CHECK(gpuMalloc((void**) &equalCommDSTGPU[i][key], sizeof(value_type*) * equalCommDST[i][key].size()));
+ WALBERLA_GPU_CHECK(gpuMemcpy(equalCommDSTGPU[i][key], &equalCommDST[i][key][0],sizeof(value_type*) * equalCommDST[i][key].size(), gpuMemcpyHostToDevice));
+
+ }
+ }
+ }
+
+ ~NonuniformGeneratedGPUPdfPackInfo() {
+ for (uint_t i = 0; i < equalCommSRC.size(); i++){
+ for (auto const& x : equalCommSRC[i]){
+ auto key = x.first;
+ WALBERLA_GPU_CHECK(gpuFree(equalCommSRCGPU[i][key]))
+ WALBERLA_GPU_CHECK(gpuFree(equalCommDSTGPU[i][key]))
+ }
+ }
+ }
bool constantDataExchange() const override { return true; };
bool threadsafeReceiving() const override { return false; };
/// Equal Level
void unpackDataEqualLevel(Block* receiver, Direction dir, GpuBuffer_T& buffer, gpuStream_t stream) override;
- void communicateLocalEqualLevel(const Block* sender, Block* receiver, stencil::Direction dir,
- gpuStream_t stream) override;
+ void addForLocalEqualLevelComm(const Block* sender, Block* receiver, stencil::Direction dir) override;
+ void communicateLocalEqualLevel(uint64_t level, uint8_t timestep, gpuStream_t stream) override;
+ void communicateLocalEqualLevel(const Block* sender, Block* receiver, stencil::Direction dir, gpuStream_t stream) override;
/// Coarse to Fine
void unpackDataCoarseToFine(Block* fineReceiver, const BlockID& coarseSender, stencil::Direction dir,
@@ -291,7 +391,7 @@ class NonuniformGeneratedGPUPdfPackInfo : public walberla::gpu::GeneratedNonUnif
bool areNeighborsInDirection(const Block* block, const BlockID& neighborID,
Vector3< cell_idx_t > dirVec) const;
- CellInterval intervalHullInDirection(const CellInterval& ci, Vector3< cell_idx_t > dirVec,
+ CellInterval intervalHullInDirection(const CellInterval& ci, Vector3< cell_idx_t > tangentialDir,
cell_idx_t width) const;
bool skipsThroughCoarseBlock(const Block* block, Direction dir) const;
@@ -306,6 +406,14 @@ class NonuniformGeneratedGPUPdfPackInfo : public walberla::gpu::GeneratedNonUnif
const BlockDataID pdfFieldID_;
internal::NonuniformGPUPackingKernelsWrapper< PdfField_T, LatticeStorageSpecification_T::inplace > kernels_;
+ std::array strides;
+
+ std::vector,std::vector>> equalCommSRC;
+ std::vector,std::vector>> equalCommDST;
+
+ std::vector,value_type **>> equalCommSRCGPU;
+ std::vector,value_type **>> equalCommDSTGPU;
+
public:
const BlockDataID commDataID_;
};
diff --git a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h
index 7ff9c7fd..3f0b0ad5 100644
--- a/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h
+++ b/src/lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.impl.h
@@ -55,7 +55,7 @@ std::shared_ptr< NonuniformGeneratedGPUPdfPackInfo< PdfField_T > >
auto handling = std::make_shared >(blocks);
BlockDataID commDataID = sbf->addBlockData(handling, dataIdentifier);
- return std::make_shared >(pdfFieldID, commDataID);
+ return std::make_shared >(sbf->getNumberOfLevels(), pdfFieldID, commDataID);
}
@@ -81,6 +81,9 @@ template< typename PdfField_T>
void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalEqualLevel(
const Block* sender, Block* receiver, stencil::Direction dir, gpuStream_t stream)
{
+ if(kernels_.blockWise())
+ return;
+
auto srcField = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_);
auto dstField = receiver->getData< PdfField_T >(pdfFieldID_);
@@ -92,6 +95,57 @@ void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalEqualLevel
kernels_.localCopyDirection(srcField, srcRegion, dstField, dstRegion, dir, stream);
}
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::addForLocalEqualLevelComm(
+ const Block* sender, Block* receiver, stencil::Direction dir)
+{
+ if(!kernels_.blockWise())
+ return;
+ const uint_t level = sender->getLevel();
+ auto srcField = const_cast< Block* >(sender)->getData< PdfField_T >(pdfFieldID_);
+ auto dstField = receiver->getData< PdfField_T >(pdfFieldID_);
+
+ CellInterval srcRegion;
+ CellInterval dstRegion;
+ cell_idx_t gls = skipsThroughCoarseBlock(sender, dir) ? 2 : 1;
+ srcField->getSliceBeforeGhostLayer(dir, srcRegion, gls, false);
+ dstField->getGhostRegion(stencil::inverseDir[dir], dstRegion, gls, false);
+
+ strides[0] = int64_t(srcField->xStride());
+ strides[1] = int64_t(srcField->yStride());
+ strides[2] = int64_t(srcField->zStride());
+ strides[3] = int64_t(1 * int64_t(srcField->fStride()));
+
+ value_type* data_pdfs_dst = dstField->dataAt(dstRegion.xMin(), dstRegion.yMin(), dstRegion.zMin(), 0);
+ value_type* data_pdfs_src = srcField->dataAt(srcRegion.xMin(), srcRegion.yMin(), srcRegion.zMin(), 0);
+
+ const uint_t index = level * Stencil::Q + dir;
+ Vector3 size(int64_c(srcRegion.xSize()), int64_c(srcRegion.ySize()), int64_c(srcRegion.zSize()));
+
+ equalCommDST[index][size].emplace_back(data_pdfs_dst);
+ equalCommSRC[index][size].emplace_back(data_pdfs_src);
+}
+
+
+template< typename PdfField_T>
+void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalEqualLevel(uint64_t level, uint8_t timestep, gpuStream_t stream)
+{
+ if(!kernels_.blockWise())
+ return;
+
+ for (auto dir = CommunicationStencil::beginNoCenter(); dir != CommunicationStencil::end(); ++dir){
+ const uint_t index = level * Stencil::Q + *dir;
+ for (auto const& x : equalCommSRC[index]){
+ auto key = x.first;
+ value_type** data_pdfs_src_dp = equalCommSRCGPU[index][key];
+ value_type** data_pdfs_dst_dp = equalCommDSTGPU[index][key];
+ std::array< int64_t, 4 > size = { int64_c(equalCommSRC[index][key].size()), key[0], key[1], key[2] };
+
+ kernels_.blockLocalCopyDirection(data_pdfs_src_dp, data_pdfs_dst_dp, *dir, timestep, stream, size, strides);
+ }
+ }
+}
+
template< typename PdfField_T>
void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::packDataEqualLevelImpl(
@@ -168,21 +222,13 @@ void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalCoarseToFi
Direction const unpackDir = dstIntervals[index].first;
CellInterval dstInterval = dstIntervals[index].second;
- uint_t packSize = kernels_.size(srcInterval);
-
#ifndef NDEBUG
Direction const packDir = srcIntervals[index].first;
WALBERLA_ASSERT_EQUAL(packDir, stencil::inverseDir[unpackDir])
uint_t unpackSize = kernels_.redistributeSize(dstInterval);
- WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+ WALBERLA_ASSERT_EQUAL(kernels_.size(srcInterval), unpackSize)
#endif
-
- // TODO: This is a dirty workaround. Code-generate direct redistribution!
- unsigned char *buffer;
- WALBERLA_GPU_CHECK( gpuMalloc( &buffer, packSize))
- kernels_.packAll(srcField, srcInterval, buffer, stream);
- kernels_.unpackRedistribute(dstField, dstInterval, buffer, unpackDir, stream);
- WALBERLA_GPU_CHECK(gpuFree(buffer))
+ kernels_.localCopyRedistribute(srcField, srcInterval, dstField, dstInterval, unpackDir, stream);
}
}
@@ -190,6 +236,9 @@ template< typename PdfField_T>
void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalCoarseToFine(
const Block* coarseSender, Block* fineReceiver, stencil::Direction dir, GpuBuffer_T & buffer, gpuStream_t stream)
{
+ // WARNING: This function uses an inplace buffer array.
+ // If possible the direct communicateLocalCoarseToFine without buffer array should be used
+
auto srcField = const_cast< Block* >(coarseSender)->getData< PdfField_T >(pdfFieldID_);
auto dstField = fineReceiver->getData< PdfField_T >(pdfFieldID_);
@@ -269,22 +318,16 @@ void NonuniformGeneratedGPUPdfPackInfo< PdfField_T >::communicateLocalFineToCoar
CellInterval srcInterval;
srcField->getGhostRegion(dir, srcInterval, 2);
- uint_t packSize = kernels_.partialCoalescenceSize(srcInterval, dir);
CellInterval dstInterval = getCoarseBlockCoalescenceInterval(coarseReceiver, fineSender->getId(),
invDir, dstField);
#ifndef NDEBUG
uint_t unpackSize = kernels_.size(dstInterval, invDir);
- WALBERLA_ASSERT_EQUAL(packSize, unpackSize)
+ WALBERLA_ASSERT_EQUAL(kernels_.partialCoalescenceSize(srcInterval, dir), unpackSize)
#endif
- // TODO: This is a dirty workaround. Code-generate direct redistribution!
- unsigned char *buffer;
- WALBERLA_GPU_CHECK( gpuMalloc( &buffer, packSize))
- kernels_.packPartialCoalescence(srcField, maskField, srcInterval, buffer, dir, stream);
- kernels_.unpackCoalescence(dstField, dstInterval, buffer, invDir, stream);
- WALBERLA_GPU_CHECK(gpuFree(buffer))
+ kernels_.localPartialCoalescence(srcField, maskField, srcInterval, dstField, dstInterval, dir, stream);
}
@@ -425,7 +468,7 @@ inline Vector3< cell_idx_t >
}
/**
- * Returns the part of a cell interval's hull of given \p width in direction \p dirVec.
+ * Returns the part of a cell interval's hull of given width in direction dirVec.
* @param ci The original cell interval
* @param dirVec Direction Vector
* @param width Width of the hull
diff --git a/src/lbm_generated/refinement/RefinementScaling.h b/src/lbm_generated/refinement/RefinementScaling.h
index f8015946..abee51e7 100644
--- a/src/lbm_generated/refinement/RefinementScaling.h
+++ b/src/lbm_generated/refinement/RefinementScaling.h
@@ -20,44 +20,18 @@
#pragma once
-#include "blockforest/BlockDataHandling.h"
+#include "core/DataTypes.h"
-#include "domain_decomposition/IBlock.h"
-#include "domain_decomposition/StructuredBlockStorage.h"
-
-namespace walberla
-{
-namespace lbm_generated
+namespace walberla::lbm_generated
{
-class DefaultRefinementScaling : public blockforest::AlwaysInitializeBlockDataHandling< real_t >
+inline real_t relaxationRateScaling( real_t relaxationRate, uint_t refinementLevel )
{
- public:
- DefaultRefinementScaling(const weak_ptr< StructuredBlockStorage >& blocks, const real_t parameter)
- : blocks_(blocks), parameter_(parameter){};
-
- real_t* initialize(IBlock* const block) override
- {
- WALBERLA_ASSERT_NOT_NULLPTR(block)
- auto blocks = blocks_.lock();
- WALBERLA_CHECK_NOT_NULLPTR(blocks)
-
- level_ = block->getBlockStorage().getLevel(*block);
-
- const real_t level_scale_factor = real_c(uint_t(1) << level_);
- const real_t one = real_c(1.0);
- const real_t half = real_c(0.5);
-
- return new real_t(parameter_ / (level_scale_factor * (-parameter_ * half + one) + parameter_ * half));
- }
- bool operator==(const DefaultRefinementScaling& other) const { return level_ == other.level_; }
-
- private:
- const weak_ptr< StructuredBlockStorage > blocks_;
- const real_t parameter_;
+ const real_t levelScaleFactor = real_c(uint_c(1) << refinementLevel);
+ const real_t one = real_c(1.0);
+ const real_t half = real_c(0.5);
- uint_t level_;
-};
+ return real_c(relaxationRate / (levelScaleFactor * (-relaxationRate * half + one) + relaxationRate * half));
+}
-} // namespace lbm_generated
-} // namespace walberla
\ No newline at end of file
+} // namespace walberla::lbm_generated
\ No newline at end of file
diff --git a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp
index f36797ee..8a080f86 100644
--- a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp
+++ b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.cpp
@@ -27,257 +27,108 @@
# pragma GCC diagnostic ignored "-Wunused-variable"
#endif
-/*************************************************************************************
+namespace walberla {
+namespace lbm {
+
+ /*************************************************************************************
* Kernel Definitions
*************************************************************************************/
-namespace internal_d3q19storagespecification_pack_ALL {
+ namespace internal_d3q19storagespecification_pack_ALL {
static FUNC_PREFIX void d3q19storagespecification_pack_ALL(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0;
- double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30;
- double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
- double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
- double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
- double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
- double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
- double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 1] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 3] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 4] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 5] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 6] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 7] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 8] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 9] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 10] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 11] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 12] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 13] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 14] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 15] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 16] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 17] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[19*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 19*_size_pdfs_src_2*ctr_1 + 19*ctr_2 + 18] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 9] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 10] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 11] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 12] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 13] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 14] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 15] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 16] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 17] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+ _data_buffer[19*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 19*_size_pdfs_src_0*ctr_1 + 19*ctr_0 + 18] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_ALL {
-static FUNC_PREFIX void d3q19storagespecification_unpack_ALL(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+ namespace internal_d3q19storagespecification_unpack_ALL {
+static FUNC_PREFIX void d3q19storagespecification_unpack_ALL(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0;
- double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30;
- double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
- double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
- double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
- double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
- double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
- double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2];
- _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 1];
- _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 2];
- _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 3];
- _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 4];
- _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 5];
- _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 6];
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 7];
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 8];
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 9];
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 10];
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 11];
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 12];
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 13];
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 14];
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 15];
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 16];
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 17];
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 19*_size_pdfs_dst_2*ctr_1 + 19*ctr_2 + 18];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 5];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 6];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 7];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 8];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 9];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 10];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 11];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 12];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 13];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 14];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 15];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 16];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 17];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[19*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 19*_size_pdfs_dst_0*ctr_1 + 19*ctr_0 + 18];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_ALL {
+ namespace internal_d3q19storagespecification_localCopy_ALL {
static FUNC_PREFIX void d3q19storagespecification_localCopy_ALL(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0;
- double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0;
- double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30;
- double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30;
- double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
- double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
- double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
- double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
- double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
- double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
- double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
- double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
- double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
- double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
- double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
- double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
}
}
}
@@ -285,36 +136,36 @@ static FUNC_PREFIX void d3q19storagespecification_localCopy_ALL(double * RESTRIC
}
-namespace internal_d3q19storagespecification_pack_TE {
-static FUNC_PREFIX void d3q19storagespecification_pack_TE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+ namespace internal_d3q19storagespecification_pack_B {
+static FUNC_PREFIX void d3q19storagespecification_pack_B(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_SW {
-static FUNC_PREFIX void d3q19storagespecification_pack_SW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_BW {
+static FUNC_PREFIX void d3q19storagespecification_pack_BW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
}
}
}
@@ -324,81 +175,65 @@ static FUNC_PREFIX void d3q19storagespecification_pack_SW(double * RESTRICT _da
namespace internal_d3q19storagespecification_pack_T {
static FUNC_PREFIX void d3q19storagespecification_pack_T(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_BS {
-static FUNC_PREFIX void d3q19storagespecification_pack_BS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_NW {
+static FUNC_PREFIX void d3q19storagespecification_pack_NW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_TN {
-static FUNC_PREFIX void d3q19storagespecification_pack_TN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_SW {
+static FUNC_PREFIX void d3q19storagespecification_pack_SW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_BW {
-static FUNC_PREFIX void d3q19storagespecification_pack_BW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_BS {
+static FUNC_PREFIX void d3q19storagespecification_pack_BS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
}
}
}
@@ -408,809 +243,617 @@ static FUNC_PREFIX void d3q19storagespecification_pack_BW(double * RESTRICT _da
namespace internal_d3q19storagespecification_pack_N {
static FUNC_PREFIX void d3q19storagespecification_pack_N(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_E {
-static FUNC_PREFIX void d3q19storagespecification_pack_E(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_BE {
+static FUNC_PREFIX void d3q19storagespecification_pack_BE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_NW {
-static FUNC_PREFIX void d3q19storagespecification_pack_NW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_W {
+static FUNC_PREFIX void d3q19storagespecification_pack_W(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_NE {
-static FUNC_PREFIX void d3q19storagespecification_pack_NE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_TS {
+static FUNC_PREFIX void d3q19storagespecification_pack_TS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_TW {
-static FUNC_PREFIX void d3q19storagespecification_pack_TW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_E {
+static FUNC_PREFIX void d3q19storagespecification_pack_E(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_BE {
-static FUNC_PREFIX void d3q19storagespecification_pack_BE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_SE {
+static FUNC_PREFIX void d3q19storagespecification_pack_SE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_W {
-static FUNC_PREFIX void d3q19storagespecification_pack_W(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_BN {
+static FUNC_PREFIX void d3q19storagespecification_pack_BN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_S {
-static FUNC_PREFIX void d3q19storagespecification_pack_S(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_TN {
+static FUNC_PREFIX void d3q19storagespecification_pack_TN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_SE {
-static FUNC_PREFIX void d3q19storagespecification_pack_SE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_NE {
+static FUNC_PREFIX void d3q19storagespecification_pack_NE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_TS {
-static FUNC_PREFIX void d3q19storagespecification_pack_TS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_TW {
+static FUNC_PREFIX void d3q19storagespecification_pack_TW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_BN {
-static FUNC_PREFIX void d3q19storagespecification_pack_BN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_TE {
+static FUNC_PREFIX void d3q19storagespecification_pack_TE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_pack_B {
-static FUNC_PREFIX void d3q19storagespecification_pack_B(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_pack_S {
+static FUNC_PREFIX void d3q19storagespecification_pack_S(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 1] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 3] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[5*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 5*_size_pdfs_src_2*ctr_1 + 5*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_buffer[5*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 5*_size_pdfs_src_0*ctr_1 + 5*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_BW {
-static FUNC_PREFIX void d3q19storagespecification_unpack_BW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+ namespace internal_d3q19storagespecification_unpack_N {
+static FUNC_PREFIX void d3q19storagespecification_unpack_N(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_N {
-static FUNC_PREFIX void d3q19storagespecification_unpack_N(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_TW {
+static FUNC_PREFIX void d3q19storagespecification_unpack_TW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_SE {
-static FUNC_PREFIX void d3q19storagespecification_unpack_SE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_NW {
+static FUNC_PREFIX void d3q19storagespecification_unpack_NW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_TE {
-static FUNC_PREFIX void d3q19storagespecification_unpack_TE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_S {
+static FUNC_PREFIX void d3q19storagespecification_unpack_S(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_T {
-static FUNC_PREFIX void d3q19storagespecification_unpack_T(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_BS {
+static FUNC_PREFIX void d3q19storagespecification_unpack_BS(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_TS {
-static FUNC_PREFIX void d3q19storagespecification_unpack_TS(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_NE {
+static FUNC_PREFIX void d3q19storagespecification_unpack_NE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_BE {
-static FUNC_PREFIX void d3q19storagespecification_unpack_BE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_BW {
+static FUNC_PREFIX void d3q19storagespecification_unpack_BW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_NW {
-static FUNC_PREFIX void d3q19storagespecification_unpack_NW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_BN {
+static FUNC_PREFIX void d3q19storagespecification_unpack_BN(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_NE {
-static FUNC_PREFIX void d3q19storagespecification_unpack_NE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_TN {
+static FUNC_PREFIX void d3q19storagespecification_unpack_TN(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_BS {
-static FUNC_PREFIX void d3q19storagespecification_unpack_BS(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_W {
+static FUNC_PREFIX void d3q19storagespecification_unpack_W(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_E {
-static FUNC_PREFIX void d3q19storagespecification_unpack_E(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_SW {
+static FUNC_PREFIX void d3q19storagespecification_unpack_SW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_S {
-static FUNC_PREFIX void d3q19storagespecification_unpack_S(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_SE {
+static FUNC_PREFIX void d3q19storagespecification_unpack_SE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_W {
-static FUNC_PREFIX void d3q19storagespecification_unpack_W(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_E {
+static FUNC_PREFIX void d3q19storagespecification_unpack_E(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_SW {
-static FUNC_PREFIX void d3q19storagespecification_unpack_SW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_TS {
+static FUNC_PREFIX void d3q19storagespecification_unpack_TS(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_B {
-static FUNC_PREFIX void d3q19storagespecification_unpack_B(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_BE {
+static FUNC_PREFIX void d3q19storagespecification_unpack_BE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2];
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 1];
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 2];
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 3];
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[5*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 5*_size_pdfs_dst_2*ctr_1 + 5*ctr_2 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_TN {
-static FUNC_PREFIX void d3q19storagespecification_unpack_TN(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_B {
+static FUNC_PREFIX void d3q19storagespecification_unpack_B(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_BN {
-static FUNC_PREFIX void d3q19storagespecification_unpack_BN(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_TE {
+static FUNC_PREFIX void d3q19storagespecification_unpack_TE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q19storagespecification_unpack_TW {
-static FUNC_PREFIX void d3q19storagespecification_unpack_TW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q19storagespecification_unpack_T {
+static FUNC_PREFIX void d3q19storagespecification_unpack_T(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[5*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 5*_size_pdfs_dst_0*ctr_1 + 5*ctr_0 + 4];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_NE {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_NE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+ namespace internal_d3q19storagespecification_localCopy_BE {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_BE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_TS {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_TS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_NE {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_NE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_BE {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_BE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_TE {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_TE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_BS {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_BS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_N {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_N(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_BW {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_BW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_NW {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_NW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_T {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_T(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_BS {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_BS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
- double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
}
}
}
@@ -1220,337 +863,209 @@ static FUNC_PREFIX void d3q19storagespecification_localCopy_T(double * RESTRICT
namespace internal_d3q19storagespecification_localCopy_TN {
static FUNC_PREFIX void d3q19storagespecification_localCopy_TN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_W {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_W(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_SE {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_SE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
- double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_E {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_E(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_B {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_B(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
- double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_TW {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_TW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_W {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_W(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_SW {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_SW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_TS {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_TS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_NW {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_NW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_TW {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_TW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_BN {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_BN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_BW {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_BW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_TE {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_TE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_SW {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_SW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_B {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_B(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_BN {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_BN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
- double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_N {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_N(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_E {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_E(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
- double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_S {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_S(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_T {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_T(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
- double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q19storagespecification_localCopy_SE {
-static FUNC_PREFIX void d3q19storagespecification_localCopy_SE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q19storagespecification_localCopy_S {
+static FUNC_PREFIX void d3q19storagespecification_localCopy_S(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
}
}
}
@@ -1558,15 +1073,12 @@ static FUNC_PREFIX void d3q19storagespecification_localCopy_SE(double * RESTRICT
}
+
-
-/*************************************************************************************
+ /*************************************************************************************
* Kernel Wrappers
*************************************************************************************/
-namespace walberla {
-namespace lbm {
-
void D3Q19StorageSpecification::PackKernels::packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const
{
double * buffer = reinterpret_cast(outBuffer);
diff --git a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h
index 7c2fb9e8..7777f1d5 100644
--- a/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h
+++ b/src/lbm_generated/storage_specification/D3Q19StorageSpecification.h
@@ -68,9 +68,35 @@ class D3Q19StorageSpecification
// If true the background deviation (rho_0 = 1) is subtracted for the collision step.
static const bool zeroCenteredPDFs = true;
// Lattice weights
- static constexpr double w[19] = { 0.333333333333333,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778 };
+ static constexpr double w[19] = { double(0.333333333333333), double(0.0555555555555556), double(0.0555555555555556), double(0.0555555555555556), double(0.0555555555555556), double(0.0555555555555556), double(0.0555555555555556), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778), double(0.0277777777777778) };
// Inverse lattice weights
- static constexpr double wInv[19] = { 3.00000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,18.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000,36.0000000000000 };
+ static constexpr double wInv[19] = { double(3.00000000000000), double(18.0000000000000), double(18.0000000000000), double(18.0000000000000), double(18.0000000000000), double(18.0000000000000), double(18.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000), double(36.0000000000000) };
+
+ struct AccessorEVEN
+ {
+ static constexpr cell_idx_t readX[19] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, -1 };
+ static constexpr cell_idx_t readY[19] = { 0, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1, -1, 1, 0, 0, -1, 1, 0, 0 };
+ static constexpr cell_idx_t readZ[19] = { 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1 };
+ static constexpr cell_idx_t readD[19] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 };
+
+ static constexpr cell_idx_t writeX[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static constexpr cell_idx_t writeY[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static constexpr cell_idx_t writeZ[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static constexpr cell_idx_t writeD[19] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 };
+ };
+
+ struct AccessorODD
+ {
+ static constexpr cell_idx_t readX[19] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, -1 };
+ static constexpr cell_idx_t readY[19] = { 0, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1, -1, 1, 0, 0, -1, 1, 0, 0 };
+ static constexpr cell_idx_t readZ[19] = { 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1 };
+ static constexpr cell_idx_t readD[19] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 };
+
+ static constexpr cell_idx_t writeX[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static constexpr cell_idx_t writeY[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static constexpr cell_idx_t writeZ[19] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static constexpr cell_idx_t writeD[19] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 };
+ };
// Compute kernels to pack and unpack MPI buffers
class PackKernels {
@@ -84,8 +110,8 @@ class D3Q19StorageSpecification
static const bool inplace = false;
/**
- * Packs all pdfs from the given cell interval to the send buffer.
- * */
+ * Packs all pdfs from the given cell interval to the send buffer.
+ * */
void packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const;
/**
@@ -124,7 +150,7 @@ class D3Q19StorageSpecification
* @return The required size of the buffer, in bytes
* */
uint_t size (CellInterval & ci, stencil::Direction dir) const {
- return ci.numCells() * sizes[dir] * sizeof(value_type);
+ return ci.numCells() * sizes[dir] * uint_c(sizeof(value_type));
}
/**
@@ -134,7 +160,7 @@ class D3Q19StorageSpecification
* @return The required size of the buffer, in bytes
* */
uint_t size (CellInterval & ci) const {
- return ci.numCells() * 19 * sizeof(value_type);
+ return ci.numCells() * 19 * uint_c(sizeof(value_type));
}
@@ -143,6 +169,8 @@ class D3Q19StorageSpecification
const uint_t sizes[27] { 0, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
};
+ using value_type = PackKernels::value_type;
+
};
}} //lbm/walberla
\ No newline at end of file
diff --git a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp
index 3ecdf889..dce3446a 100644
--- a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp
+++ b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.cpp
@@ -27,345 +27,132 @@
# pragma GCC diagnostic ignored "-Wunused-variable"
#endif
-/*************************************************************************************
+namespace walberla {
+namespace lbm {
+
+ /*************************************************************************************
* Kernel Definitions
*************************************************************************************/
-namespace internal_d3q27storagespecification_pack_ALL {
+ namespace internal_d3q27storagespecification_pack_ALL {
static FUNC_PREFIX void d3q27storagespecification_pack_ALL(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0;
- double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30;
- double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
- double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
- double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
- double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
- double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
- double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
- {
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 1] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 3] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 4] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 5] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 6] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 7] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 8] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 9] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 10] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 11] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 12] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 13] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 14] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 15] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 16] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 17] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 18] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 19] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 20] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 21] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 22] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 23] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 24] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 25] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[27*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 27*_size_pdfs_src_2*ctr_1 + 27*ctr_2 + 26] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
- }
- }
- }
-}
-}
-
-namespace internal_d3q27storagespecification_unpack_ALL {
-static FUNC_PREFIX void d3q27storagespecification_unpack_ALL(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0;
- double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ {
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 9] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 10] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 11] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 12] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 13] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 14] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 15] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 16] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 17] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 18] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 19] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 20] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 21] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 22] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 23] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 24] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 25] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+ _data_buffer[27*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 27*_size_pdfs_src_0*ctr_1 + 27*ctr_0 + 26] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
+ }
+ }
+ }
+}
+}
+
+ namespace internal_d3q27storagespecification_unpack_ALL {
+static FUNC_PREFIX void d3q27storagespecification_unpack_ALL(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30;
- double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
- double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
- double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
- double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
- double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
- double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
- {
- _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2];
- _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 1];
- _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 2];
- _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 3];
- _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 4];
- _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 5];
- _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 6];
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 7];
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 8];
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 9];
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 10];
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 11];
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 12];
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 13];
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 14];
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 15];
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 16];
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 17];
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 18];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 19];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 20];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 21];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 22];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 23];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 24];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 25];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 27*_size_pdfs_dst_2*ctr_1 + 27*ctr_2 + 26];
- }
- }
- }
-}
-}
-
-namespace internal_d3q27storagespecification_localCopy_ALL {
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ {
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 5];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 6];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 7];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 8];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 9];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 10];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 11];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 12];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 13];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 14];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 15];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 16];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 17];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 18];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 19];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 20];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 21];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 22];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 23];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 24];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 25];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[27*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 27*_size_pdfs_dst_0*ctr_1 + 27*ctr_0 + 26];
+ }
+ }
+ }
+}
+}
+
+ namespace internal_d3q27storagespecification_localCopy_ALL {
static FUNC_PREFIX void d3q27storagespecification_localCopy_ALL(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_30 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0;
- double * RESTRICT _data_pdfs_src_00_30 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0;
- double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_30_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_30;
- double * RESTRICT _data_pdfs_src_00_30_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_30;
- double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
- double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
- double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
- double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
- double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
- double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
- double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
- double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
- double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
- double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
- double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
- double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
- {
- _data_pdfs_dst_00_30_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_30_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ {
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
@@ -373,276 +160,200 @@ static FUNC_PREFIX void d3q27storagespecification_localCopy_ALL(double * RESTRIC
}
-namespace internal_d3q27storagespecification_pack_T {
-static FUNC_PREFIX void d3q27storagespecification_pack_T(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+ namespace internal_d3q27storagespecification_pack_BE {
+static FUNC_PREFIX void d3q27storagespecification_pack_BE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_BN {
-static FUNC_PREFIX void d3q27storagespecification_pack_BN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_SE {
+static FUNC_PREFIX void d3q27storagespecification_pack_SE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_NE {
-static FUNC_PREFIX void d3q27storagespecification_pack_NE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_BW {
+static FUNC_PREFIX void d3q27storagespecification_pack_BW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_BNE {
-static FUNC_PREFIX void d3q27storagespecification_pack_BNE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_BSW {
+static FUNC_PREFIX void d3q27storagespecification_pack_BSW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_SE {
-static FUNC_PREFIX void d3q27storagespecification_pack_SE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_N {
+static FUNC_PREFIX void d3q27storagespecification_pack_N(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_TNW {
-static FUNC_PREFIX void d3q27storagespecification_pack_TNW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_SW {
+static FUNC_PREFIX void d3q27storagespecification_pack_SW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_W {
-static FUNC_PREFIX void d3q27storagespecification_pack_W(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_T {
+static FUNC_PREFIX void d3q27storagespecification_pack_T(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_TE {
-static FUNC_PREFIX void d3q27storagespecification_pack_TE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_B {
+static FUNC_PREFIX void d3q27storagespecification_pack_B(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_N {
-static FUNC_PREFIX void d3q27storagespecification_pack_N(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_TNW {
+static FUNC_PREFIX void d3q27storagespecification_pack_TNW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_BSW {
-static FUNC_PREFIX void d3q27storagespecification_pack_BSW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_W {
+static FUNC_PREFIX void d3q27storagespecification_pack_W(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
@@ -652,165 +363,123 @@ static FUNC_PREFIX void d3q27storagespecification_pack_BSW(double * RESTRICT _d
namespace internal_d3q27storagespecification_pack_TSW {
static FUNC_PREFIX void d3q27storagespecification_pack_TSW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_BE {
-static FUNC_PREFIX void d3q27storagespecification_pack_BE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_E {
+static FUNC_PREFIX void d3q27storagespecification_pack_E(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_B {
-static FUNC_PREFIX void d3q27storagespecification_pack_B(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_BSE {
+static FUNC_PREFIX void d3q27storagespecification_pack_BSE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_TNE {
-static FUNC_PREFIX void d3q27storagespecification_pack_TNE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_TN {
+static FUNC_PREFIX void d3q27storagespecification_pack_TN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_TS {
-static FUNC_PREFIX void d3q27storagespecification_pack_TS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_BNW {
+static FUNC_PREFIX void d3q27storagespecification_pack_BNW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_TN {
-static FUNC_PREFIX void d3q27storagespecification_pack_TN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_TS {
+static FUNC_PREFIX void d3q27storagespecification_pack_TS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_BNW {
-static FUNC_PREFIX void d3q27storagespecification_pack_BNW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_NW {
+static FUNC_PREFIX void d3q27storagespecification_pack_NW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
}
}
}
@@ -820,105 +489,71 @@ static FUNC_PREFIX void d3q27storagespecification_pack_BNW(double * RESTRICT _d
namespace internal_d3q27storagespecification_pack_TW {
static FUNC_PREFIX void d3q27storagespecification_pack_TW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_BSE {
-static FUNC_PREFIX void d3q27storagespecification_pack_BSE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_S {
+static FUNC_PREFIX void d3q27storagespecification_pack_S(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 4] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 5] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 6] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 7] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+ _data_buffer[9*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 9*_size_pdfs_src_0*ctr_1 + 9*ctr_0 + 8] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_NW {
-static FUNC_PREFIX void d3q27storagespecification_pack_NW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_TNE {
+static FUNC_PREFIX void d3q27storagespecification_pack_TNE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_S {
-static FUNC_PREFIX void d3q27storagespecification_pack_S(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_TSE {
+static FUNC_PREFIX void d3q27storagespecification_pack_TSE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
}
}
}
@@ -928,1693 +563,1061 @@ static FUNC_PREFIX void d3q27storagespecification_pack_S(double * RESTRICT _dat
namespace internal_d3q27storagespecification_pack_BS {
static FUNC_PREFIX void d3q27storagespecification_pack_BS(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_TSE {
-static FUNC_PREFIX void d3q27storagespecification_pack_TSE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_TE {
+static FUNC_PREFIX void d3q27storagespecification_pack_TE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + _size_pdfs_src_2*ctr_1 + ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_SW {
-static FUNC_PREFIX void d3q27storagespecification_pack_SW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_BN {
+static FUNC_PREFIX void d3q27storagespecification_pack_BN(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_BW {
-static FUNC_PREFIX void d3q27storagespecification_pack_BW(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_NE {
+static FUNC_PREFIX void d3q27storagespecification_pack_NE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 1] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[3*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 3*_size_pdfs_src_2*ctr_1 + 3*ctr_2 + 2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 1] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_buffer[3*_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + 3*_size_pdfs_src_0*ctr_1 + 3*ctr_0 + 2] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_pack_E {
-static FUNC_PREFIX void d3q27storagespecification_pack_E(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_pack_BNE {
+static FUNC_PREFIX void d3q27storagespecification_pack_BNE(double * RESTRICT _data_buffer, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_src_0, int64_t const _size_pdfs_src_1, int64_t const _size_pdfs_src_2, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_src_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_src_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_src_0; ctr_0 += 1)
{
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 1] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 3] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 4] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 5] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 6] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 7] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
- _data_buffer[9*_size_pdfs_src_1*_size_pdfs_src_2*ctr_0 + 9*_size_pdfs_src_2*ctr_1 + 9*ctr_2 + 8] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+ _data_buffer[_size_pdfs_src_0*_size_pdfs_src_1*ctr_2 + _size_pdfs_src_0*ctr_1 + ctr_0] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_TSE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TSE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+ namespace internal_d3q27storagespecification_unpack_SE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_SE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_T {
-static FUNC_PREFIX void d3q27storagespecification_unpack_T(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+namespace internal_d3q27storagespecification_unpack_BS {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BS(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_TN {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TN(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BNW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BNW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_SW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_SW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BSW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BSW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_TNE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TNE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_W {
+static FUNC_PREFIX void d3q27storagespecification_unpack_W(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_BN {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BN(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_TN {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TN(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_W {
-static FUNC_PREFIX void d3q27storagespecification_unpack_W(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
+namespace internal_d3q27storagespecification_unpack_E {
+static FUNC_PREFIX void d3q27storagespecification_unpack_E(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_E {
-static FUNC_PREFIX void d3q27storagespecification_unpack_E(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+namespace internal_d3q27storagespecification_unpack_TS {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TS(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_BNE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BNE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_TW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_TNW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TNW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_NE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_NE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_BSE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BSE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_NW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_NW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_BSW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BSW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_TE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_SE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_SE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_S {
+static FUNC_PREFIX void d3q27storagespecification_unpack_S(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_N {
-static FUNC_PREFIX void d3q27storagespecification_unpack_N(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
+namespace internal_d3q27storagespecification_unpack_B {
+static FUNC_PREFIX void d3q27storagespecification_unpack_B(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_NE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_NE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BN {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BN(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_TE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_TSE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TSE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_B {
-static FUNC_PREFIX void d3q27storagespecification_unpack_B(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
+namespace internal_d3q27storagespecification_unpack_TNE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TNE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_NW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_NW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_S {
-static FUNC_PREFIX void d3q27storagespecification_unpack_S(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
-{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
+namespace internal_d3q27storagespecification_unpack_TNW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TNW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+{
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2];
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 1];
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 2];
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 3];
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 4];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 5];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 6];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 7];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[9*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 9*_size_pdfs_dst_2*ctr_1 + 9*ctr_2 + 8];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_TSW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TSW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BNE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BNE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_BE {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BE(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BSE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BSE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_BS {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BS(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_N {
+static FUNC_PREFIX void d3q27storagespecification_unpack_N(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_BW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_BE {
+static FUNC_PREFIX void d3q27storagespecification_unpack_BE(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_TS {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TS(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_SW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_SW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[3*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 3*_size_pdfs_dst_0*ctr_1 + 3*ctr_0 + 2];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_BNW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_BNW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_T {
+static FUNC_PREFIX void d3q27storagespecification_unpack_T(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + _size_pdfs_dst_2*ctr_1 + ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 1];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 4];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 5];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 6];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 7];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_buffer[9*_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + 9*_size_pdfs_dst_0*ctr_1 + 9*ctr_0 + 8];
}
}
}
}
}
-namespace internal_d3q27storagespecification_unpack_TW {
-static FUNC_PREFIX void d3q27storagespecification_unpack_TW(const double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
+namespace internal_d3q27storagespecification_unpack_TSW {
+static FUNC_PREFIX void d3q27storagespecification_unpack_TSW(double * RESTRICT const _data_buffer, double * RESTRICT _data_pdfs_dst, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 1];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_buffer[3*_size_pdfs_dst_1*_size_pdfs_dst_2*ctr_0 + 3*_size_pdfs_dst_2*ctr_1 + 3*ctr_2 + 2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_buffer[_size_pdfs_dst_0*_size_pdfs_dst_1*ctr_2 + _size_pdfs_dst_0*ctr_1 + ctr_0];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_SE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_SE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+ namespace internal_d3q27storagespecification_localCopy_TNE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TNE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_TS {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_S {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_S(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 2*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 2*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_BNW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BNW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_E {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_E(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 4*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 4*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_TSW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TSW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_TE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_SW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_SW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_TNE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TNE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TS {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_BS {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BSE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BSE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_W {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_W(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BNE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BNE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_33 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 3*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_33 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 3*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_33_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_33;
- double * RESTRICT _data_pdfs_src_00_33_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_33;
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
- {
- _data_pdfs_dst_00_33_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_33_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ {
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_TSE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TSE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TSW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TSW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_NE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_NE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_SE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_SE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 10*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 10*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_B {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_B(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_W {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_W(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_36 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 6*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_36 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 6*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_36_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_36;
- double * RESTRICT _data_pdfs_src_00_36_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_36;
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
- {
- _data_pdfs_dst_00_36_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_36_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ {
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 3*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 3*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 9*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 9*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_TNW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TNW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TSE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TSE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_NW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_NW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_BN {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_TW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_B {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_B(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 6*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 6*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 17*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 17*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_BW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_N {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_N(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_317 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 17*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_317 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 17*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_317_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_317;
- double * RESTRICT _data_pdfs_src_00_317_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_317;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_317_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_317_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + _stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + _stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_SW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_SW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_NW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_NW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 7*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 7*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_T {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_T(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TNW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TNW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_35 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 5*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_35 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 5*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_313 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 13*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_313 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 13*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_35_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_35;
- double * RESTRICT _data_pdfs_src_00_35_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_35;
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_dst_00_313_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_313;
- double * RESTRICT _data_pdfs_src_00_313_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_313;
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
- {
- _data_pdfs_dst_00_35_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_35_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_313_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_313_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ {
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_BSW {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BSW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_NE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_NE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 8*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 8*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_S {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_S(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BN {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_32 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 2*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_32 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 2*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_39 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 9*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_39 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 9*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_312 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 12*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_312 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 12*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_316 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 16*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_316 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 16*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_322 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 22*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_322 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 22*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_326 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 26*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_326 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 26*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_32_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_32;
- double * RESTRICT _data_pdfs_src_00_32_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_32;
- double * RESTRICT _data_pdfs_dst_00_39_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_39;
- double * RESTRICT _data_pdfs_src_00_39_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_39;
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_dst_00_312_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_312;
- double * RESTRICT _data_pdfs_src_00_312_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_312;
- double * RESTRICT _data_pdfs_dst_00_316_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_316;
- double * RESTRICT _data_pdfs_src_00_316_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_316;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- double * RESTRICT _data_pdfs_dst_00_322_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_322;
- double * RESTRICT _data_pdfs_src_00_322_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_322;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- double * RESTRICT _data_pdfs_dst_00_326_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_326;
- double * RESTRICT _data_pdfs_src_00_326_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_326;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
- {
- _data_pdfs_dst_00_32_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_32_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_39_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_39_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_312_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_312_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_316_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_316_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_322_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_322_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_326_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_326_10[_stride_pdfs_src_2*ctr_2];
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ {
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 15*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 15*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_TN {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_TN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_T {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_T(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 5*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 5*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 12*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 12*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 13*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 13*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 14*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 14*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 21*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 21*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 22*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 22*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_E {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_E(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BNW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BNW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_34 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 4*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_34 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 4*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_310 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 10*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_310 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 10*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_314 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 14*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_314 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 14*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_321 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 21*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_321 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 21*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_34_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_34;
- double * RESTRICT _data_pdfs_src_00_34_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_34;
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_dst_00_310_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_310;
- double * RESTRICT _data_pdfs_src_00_310_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_310;
- double * RESTRICT _data_pdfs_dst_00_314_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_314;
- double * RESTRICT _data_pdfs_src_00_314_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_314;
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_dst_00_321_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_321;
- double * RESTRICT _data_pdfs_src_00_321_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_321;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
- {
- _data_pdfs_dst_00_34_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_34_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_310_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_310_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_314_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_314_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_321_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_321_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ {
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 24*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 24*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_N {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_N(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BS {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BS(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
- {
- double * RESTRICT _data_pdfs_dst_00_31 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_31 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_37 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 7*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_37 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 7*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_38 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 8*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_38 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 8*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_311 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 11*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_311 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 11*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_315 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 15*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_315 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 15*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_319 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 19*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_319 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 19*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_320 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 20*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_320 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 20*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_324 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 24*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_324 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 24*_stride_pdfs_src_3;
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ {
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_31_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_31;
- double * RESTRICT _data_pdfs_src_00_31_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_31;
- double * RESTRICT _data_pdfs_dst_00_37_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_37;
- double * RESTRICT _data_pdfs_src_00_37_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_37;
- double * RESTRICT _data_pdfs_dst_00_38_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_38;
- double * RESTRICT _data_pdfs_src_00_38_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_38;
- double * RESTRICT _data_pdfs_dst_00_311_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_311;
- double * RESTRICT _data_pdfs_src_00_311_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_311;
- double * RESTRICT _data_pdfs_dst_00_315_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_315;
- double * RESTRICT _data_pdfs_src_00_315_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_315;
- double * RESTRICT _data_pdfs_dst_00_319_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_319;
- double * RESTRICT _data_pdfs_src_00_319_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_319;
- double * RESTRICT _data_pdfs_dst_00_320_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_320;
- double * RESTRICT _data_pdfs_src_00_320_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_320;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- double * RESTRICT _data_pdfs_dst_00_324_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_324;
- double * RESTRICT _data_pdfs_src_00_324_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_324;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
- {
- _data_pdfs_dst_00_31_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_31_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_37_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_37_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_38_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_38_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_311_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_311_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_315_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_315_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_319_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_319_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_320_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_320_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_324_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_324_10[_stride_pdfs_src_2*ctr_2];
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ {
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 16*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 16*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_BSE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BSE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BSW {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BSW(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 26*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 26*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_BE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_TN {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_TN(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_318 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 18*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_318 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 18*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
- double * RESTRICT _data_pdfs_dst_00_325 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 25*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_325 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 25*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_318_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_318;
- double * RESTRICT _data_pdfs_src_00_318_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_318;
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- double * RESTRICT _data_pdfs_dst_00_325_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_325;
- double * RESTRICT _data_pdfs_src_00_325_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_325;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_318_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_318_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
- _data_pdfs_dst_00_325_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_325_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 11*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 11*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 19*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 19*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 20*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 20*_stride_pdfs_src_3];
}
}
}
}
}
-namespace internal_d3q27storagespecification_localCopy_BNE {
-static FUNC_PREFIX void d3q27storagespecification_localCopy_BNE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
+namespace internal_d3q27storagespecification_localCopy_BE {
+static FUNC_PREFIX void d3q27storagespecification_localCopy_BE(double * RESTRICT _data_pdfs_dst, double * RESTRICT const _data_pdfs_src, int64_t const _size_pdfs_dst_0, int64_t const _size_pdfs_dst_1, int64_t const _size_pdfs_dst_2, int64_t const _stride_pdfs_dst_0, int64_t const _stride_pdfs_dst_1, int64_t const _stride_pdfs_dst_2, int64_t const _stride_pdfs_dst_3, int64_t const _stride_pdfs_src_0, int64_t const _stride_pdfs_src_1, int64_t const _stride_pdfs_src_2, int64_t const _stride_pdfs_src_3)
{
- for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
+ for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_323 = _data_pdfs_dst + _stride_pdfs_dst_0*ctr_0 + 23*_stride_pdfs_dst_3;
- double * RESTRICT _data_pdfs_src_00_323 = _data_pdfs_src + _stride_pdfs_src_0*ctr_0 + 23*_stride_pdfs_src_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_dst_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_dst_00_323_10 = _stride_pdfs_dst_1*ctr_1 + _data_pdfs_dst_00_323;
- double * RESTRICT _data_pdfs_src_00_323_10 = _stride_pdfs_src_1*ctr_1 + _data_pdfs_src_00_323;
- for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_dst_2; ctr_2 += 1)
+ for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_dst_0; ctr_0 += 1)
{
- _data_pdfs_dst_00_323_10[_stride_pdfs_dst_2*ctr_2] = _data_pdfs_src_00_323_10[_stride_pdfs_src_2*ctr_2];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 18*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 18*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 23*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 23*_stride_pdfs_src_3];
+ _data_pdfs_dst[_stride_pdfs_dst_0*ctr_0 + _stride_pdfs_dst_1*ctr_1 + _stride_pdfs_dst_2*ctr_2 + 25*_stride_pdfs_dst_3] = _data_pdfs_src[_stride_pdfs_src_0*ctr_0 + _stride_pdfs_src_1*ctr_1 + _stride_pdfs_src_2*ctr_2 + 25*_stride_pdfs_src_3];
}
}
}
@@ -2622,15 +1625,12 @@ static FUNC_PREFIX void d3q27storagespecification_localCopy_BNE(double * RESTRIC
}
+
-
-/*************************************************************************************
+ /*************************************************************************************
* Kernel Wrappers
*************************************************************************************/
-namespace walberla {
-namespace lbm {
-
void D3Q27StorageSpecification::PackKernels::packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const
{
double * buffer = reinterpret_cast(outBuffer);
diff --git a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h
index 42599878..c765ef8a 100644
--- a/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h
+++ b/src/lbm_generated/storage_specification/D3Q27StorageSpecification.h
@@ -68,9 +68,35 @@ class D3Q27StorageSpecification
// If true the background deviation (rho_0 = 1) is subtracted for the collision step.
static const bool zeroCenteredPDFs = true;
// Lattice weights
- static constexpr double w[27] = { 0.296296296296296,0.0740740740740741,0.0740740740740741,0.0740740740740741,0.0740740740740741,0.0740740740740741,0.0740740740740741,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.0185185185185185,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963,0.00462962962962963 };
+ static constexpr double w[27] = { double(0.296296296296296), double(0.0740740740740741), double(0.0740740740740741), double(0.0740740740740741), double(0.0740740740740741), double(0.0740740740740741), double(0.0740740740740741), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.0185185185185185), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963), double(0.00462962962962963) };
// Inverse lattice weights
- static constexpr double wInv[27] = { 3.37500000000000,13.5000000000000,13.5000000000000,13.5000000000000,13.5000000000000,13.5000000000000,13.5000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,54.0000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000,216.000000000000 };
+ static constexpr double wInv[27] = { double(3.37500000000000), double(13.5000000000000), double(13.5000000000000), double(13.5000000000000), double(13.5000000000000), double(13.5000000000000), double(13.5000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(54.0000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000), double(216.000000000000) };
+
+ struct AccessorEVEN
+ {
+ static constexpr cell_idx_t readX[27] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, -1, 1, -1, 1, -1, 1 };
+ static constexpr cell_idx_t readY[27] = { 0, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, -1, 1, 1, -1, -1, 1, 1 };
+ static constexpr cell_idx_t readZ[27] = { 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1 };
+ static constexpr cell_idx_t readD[27] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 };
+
+ static constexpr cell_idx_t writeX[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static constexpr cell_idx_t writeY[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static constexpr cell_idx_t writeZ[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static constexpr cell_idx_t writeD[27] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 };
+ };
+
+ struct AccessorODD
+ {
+ static constexpr cell_idx_t readX[27] = { 0, 0, 0, 1, -1, 0, 0, 1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, -1, -1, 1, -1, 1, -1, 1, -1, 1 };
+ static constexpr cell_idx_t readY[27] = { 0, -1, 1, 0, 0, 0, 0, -1, -1, 1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, -1, 1, 1, -1, -1, 1, 1 };
+ static constexpr cell_idx_t readZ[27] = { 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1 };
+ static constexpr cell_idx_t readD[27] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 };
+
+ static constexpr cell_idx_t writeX[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static constexpr cell_idx_t writeY[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static constexpr cell_idx_t writeZ[27] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ static constexpr cell_idx_t writeD[27] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 };
+ };
// Compute kernels to pack and unpack MPI buffers
class PackKernels {
@@ -84,8 +110,8 @@ class D3Q27StorageSpecification
static const bool inplace = false;
/**
- * Packs all pdfs from the given cell interval to the send buffer.
- * */
+ * Packs all pdfs from the given cell interval to the send buffer.
+ * */
void packAll(PdfField_T * pdfs_src, CellInterval & ci, unsigned char * outBuffer) const;
/**
@@ -124,7 +150,7 @@ class D3Q27StorageSpecification
* @return The required size of the buffer, in bytes
* */
uint_t size (CellInterval & ci, stencil::Direction dir) const {
- return ci.numCells() * sizes[dir] * sizeof(value_type);
+ return ci.numCells() * sizes[dir] * uint_c(sizeof(value_type));
}
/**
@@ -134,7 +160,7 @@ class D3Q27StorageSpecification
* @return The required size of the buffer, in bytes
* */
uint_t size (CellInterval & ci) const {
- return ci.numCells() * 27 * sizeof(value_type);
+ return ci.numCells() * 27 * uint_c(sizeof(value_type));
}
@@ -143,6 +169,8 @@ class D3Q27StorageSpecification
const uint_t sizes[27] { 0, 9, 9, 9, 9, 9, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1 };
};
+ using value_type = PackKernels::value_type;
+
};
}} //lbm/walberla
\ No newline at end of file
diff --git a/src/lbm_generated/storage_specification/storage_specification_generation_script.py b/src/lbm_generated/storage_specification/storage_specification_generation_script.py
index d7432ee7..42dcac5f 100644
--- a/src/lbm_generated/storage_specification/storage_specification_generation_script.py
+++ b/src/lbm_generated/storage_specification/storage_specification_generation_script.py
@@ -3,7 +3,7 @@
from pystencils import Target
from lbmpy.creationfunctions import create_lb_method
-from lbmpy import LBMConfig, Stencil, Method, LBStencil
+from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil
from pystencils_walberla import ManualCodeGenerationContext, generate_info_header
from lbmpy_walberla.storage_specification import generate_lbm_storage_specification
@@ -22,11 +22,12 @@
lbm_config = LBMConfig(stencil=stencil, method=method, relaxation_rate=relaxation_rate,
streaming_pattern=streaming_pattern)
+ lbm_opt = LBMOptimisation()
lb_method = create_lb_method(lbm_config=lbm_config)
storage_spec_name = f'{stencil.name}StorageSpecification'
- generate_lbm_storage_specification(ctx, storage_spec_name, lb_method, lbm_config,
+ generate_lbm_storage_specification(ctx, storage_spec_name, lb_method, lbm_config, lbm_opt,
nonuniform=nonuniform, target=target, data_type=data_type)
ctx.write_all_files()
diff --git a/src/lbm_generated/sweep_collection/D3Q19SRT.cpp b/src/lbm_generated/sweep_collection/D3Q19SRT.cpp
index b2ed0836..02cf3aaf 100644
--- a/src/lbm_generated/sweep_collection/D3Q19SRT.cpp
+++ b/src/lbm_generated/sweep_collection/D3Q19SRT.cpp
@@ -41,119 +41,43 @@ static FUNC_PREFIX void d3q19srt_kernel_streamCollide(double * RESTRICT const _d
{
for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
- double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
- double * RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
- double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
- double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
- double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
- double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
- double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
- double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
- double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
- double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
- double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
- double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
- double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
- double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
- double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
- double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
- double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
- double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
- double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
- double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
- double * RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
- double * RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
- double * RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
- double * RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
- double * RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
- double * RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
- double * RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
- double * RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
- double * RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
- double * RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
- double * RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
- double * RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
- double * RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
- double * RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
- double * RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
- double * RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
- double * RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
- double * RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
- double * RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
{
- const double vel0Term = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double vel1Term = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
- const double vel2Term = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
- const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
- const double u_0 = vel0Term - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double u_1 = vel1Term - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double u_2 = vel2Term - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double u0Mu1 = u_0 + u_1*-1.0;
+ const double vel0Term = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+ const double vel1Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+ const double vel2Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+ const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+ const double u_0 = vel0Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+ const double u_1 = vel1Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+ const double u_2 = vel2Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+ const double u0Mu1 = u_0 - u_1;
const double u0Pu1 = u_0 + u_1;
const double u1Pu2 = u_1 + u_2;
- const double u1Mu2 = u_1 + u_2*-1.0;
- const double u0Mu2 = u_0 + u_2*-1.0;
+ const double u1Mu2 = u_1 - u_2;
+ const double u0Mu2 = u_0 - u_2;
const double u0Pu2 = u_0 + u_2;
- const double f_eq_common = delta_rho - 1.0*(u_0*u_0) - 1.0*(u_1*u_1) - 1.0*(u_2*u_2);
- _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.33333333333333331 - 1.0*_data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 - 1.0*_data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 - 1.0*_data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 - 1.0*_data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 - 1.0*_data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 - 1.0*_data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 - 1.0*_data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
+ const double f_eq_common = delta_rho - u_0*u_0 - u_1*u_1 - u_2*u_2;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = omega*(f_eq_common*0.33333333333333331 - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 + 0.33333333333333331*(u_1*u_1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 + 0.33333333333333331*(u_1*u_1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 + 0.33333333333333331*(u_0*u_0) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 + 0.33333333333333331*(u_0*u_0) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 + 0.33333333333333331*(u_2*u_2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 + 0.33333333333333331*(u_2*u_2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3];
}
}
}
@@ -166,100 +90,62 @@ static FUNC_PREFIX void d3q19srt_kernel_collide(double * RESTRICT _data_pdfs, i
{
for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
- double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
- double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
- double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
- double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
- double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
- double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
- double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
- double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
- double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
- double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
- double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
- double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
- double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
- double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
- double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
- double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
- double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
- double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
- double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
{
- const double xi_1 = _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0];
- const double xi_2 = _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0];
- const double xi_3 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
- const double xi_4 = _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0];
- const double xi_5 = _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
- const double xi_6 = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0];
- const double xi_7 = _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0];
- const double xi_8 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0];
- const double xi_9 = _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
- const double xi_10 = _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0];
- const double xi_11 = _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0];
- const double xi_12 = _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
- const double xi_13 = _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
- const double xi_14 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0];
- const double xi_15 = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0];
- const double xi_16 = _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0];
- const double xi_17 = _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0];
- const double xi_18 = _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0];
- const double xi_19 = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0];
- const double vel0Term = xi_15 + xi_17 + xi_2 + xi_8 + xi_9;
- const double vel1Term = xi_1 + xi_4 + xi_5 + xi_6;
- const double vel2Term = xi_11 + xi_13 + xi_19;
- const double delta_rho = vel0Term + vel1Term + vel2Term + xi_10 + xi_12 + xi_14 + xi_16 + xi_18 + xi_3 + xi_7;
- const double u_0 = vel0Term + xi_11*-1.0 + xi_12*-1.0 + xi_14*-1.0 + xi_16*-1.0 + xi_5*-1.0;
- const double u_1 = vel1Term + xi_12*-1.0 + xi_15*-1.0 + xi_18*-1.0 + xi_19*-1.0 + xi_7*-1.0 + xi_9;
- const double u_2 = vel2Term + xi_1*-1.0 + xi_10*-1.0 + xi_16*-1.0 + xi_17 + xi_18*-1.0 + xi_2*-1.0 + xi_6;
- const double u0Mu1 = u_0 + u_1*-1.0;
+ const double xi_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3];
+ const double xi_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+ const double xi_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+ const double xi_4 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3];
+ const double xi_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+ const double xi_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3];
+ const double xi_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+ const double xi_8 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3];
+ const double xi_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3];
+ const double xi_10 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+ const double xi_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3];
+ const double xi_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+ const double xi_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3];
+ const double xi_14 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3];
+ const double xi_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+ const double xi_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3];
+ const double xi_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+ const double xi_18 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3];
+ const double xi_19 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+ const double vel0Term = xi_15 + xi_3 + xi_4 + xi_6 + xi_7;
+ const double vel1Term = xi_12 + xi_14 + xi_16 + xi_2;
+ const double vel2Term = xi_1 + xi_18 + xi_8;
+ const double delta_rho = vel0Term + vel1Term + vel2Term + xi_10 + xi_11 + xi_13 + xi_17 + xi_19 + xi_5 + xi_9;
+ const double u_0 = vel0Term - xi_1 - xi_17 - xi_19 - xi_2 - xi_9;
+ const double u_1 = vel1Term - xi_10 - xi_13 - xi_15 - xi_17 + xi_3 - xi_8;
+ const double u_2 = vel2Term - xi_11 - xi_13 + xi_14 - xi_16 - xi_4 + xi_6 - xi_9;
+ const double u0Mu1 = u_0 - u_1;
const double u0Pu1 = u_0 + u_1;
const double u1Pu2 = u_1 + u_2;
- const double u1Mu2 = u_1 + u_2*-1.0;
- const double u0Mu2 = u_0 + u_2*-1.0;
+ const double u1Mu2 = u_1 - u_2;
+ const double u0Mu2 = u_0 - u_2;
const double u0Pu2 = u_0 + u_2;
- const double f_eq_common = delta_rho - 1.0*(u_0*u_0) - 1.0*(u_1*u_1) - 1.0*(u_2*u_2);
- _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.33333333333333331 + xi_3*-1.0) + xi_3;
- _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 + xi_4*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_4;
- _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 + xi_7*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_7;
- _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 + xi_14*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_14;
- _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 + xi_8*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_8;
- _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 + xi_13*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_13;
- _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 + xi_10*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_10;
- _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 + xi_5*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_5;
- _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 + xi_9*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_9;
- _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 + xi_12*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_12;
- _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 + xi_15*-1.0 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_15;
- _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 + xi_6*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_6;
- _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 + xi_19*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_19;
- _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 + xi_11*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_11;
- _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 + xi_17*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_17;
- _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 + xi_1*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_1;
- _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 + xi_18*-1.0 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_18;
- _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 + xi_16*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_16;
- _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 + xi_2*-1.0 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_2;
+ const double f_eq_common = delta_rho - u_0*u_0 - u_1*u_1 - u_2*u_2;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2] = omega*(f_eq_common*0.33333333333333331 - xi_5) + xi_5;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*0.16666666666666666 - xi_12 + 0.33333333333333331*(u_1*u_1)) + xi_12;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_1*-0.16666666666666666 - xi_10 + 0.33333333333333331*(u_1*u_1)) + xi_10;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*-0.16666666666666666 - xi_19 + 0.33333333333333331*(u_0*u_0)) + xi_19;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_0*0.16666666666666666 - xi_7 + 0.33333333333333331*(u_0*u_0)) + xi_7;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*0.16666666666666666 - xi_18 + 0.33333333333333331*(u_2*u_2)) + xi_18;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3] = omega*(delta_rho*-0.1111111111111111 + f_eq_common*0.16666666666666666 + u_2*-0.16666666666666666 - xi_11 + 0.33333333333333331*(u_2*u_2)) + xi_11;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*-0.083333333333333329 - xi_2 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_2;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*0.083333333333333329 - xi_3 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_3;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu1*-0.083333333333333329 - xi_17 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Pu1*u0Pu1)) + xi_17;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu1*0.083333333333333329 - xi_15 + 0.041666666666666664*(u_2*u_2) + 0.125*(u0Mu1*u0Mu1)) + xi_15;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*0.083333333333333329 - xi_14 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_14;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*-0.083333333333333329 - xi_8 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_8;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*-0.083333333333333329 - xi_1 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_1;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*0.083333333333333329 - xi_6 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_6;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Mu2*0.083333333333333329 - xi_16 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Mu2*u1Mu2)) + xi_16;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u1Pu2*-0.083333333333333329 - xi_13 + 0.041666666666666664*(u_0*u_0) + 0.125*(u1Pu2*u1Pu2)) + xi_13;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Pu2*-0.083333333333333329 - xi_9 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Pu2*u0Pu2)) + xi_9;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.041666666666666664 + u0Mu2*0.083333333333333329 - xi_4 + 0.041666666666666664*(u_1*u_1) + 0.125*(u0Mu2*u0Mu2)) + xi_4;
}
}
}
@@ -272,124 +158,48 @@ static FUNC_PREFIX void d3q19srt_kernel_stream(double * RESTRICT const _data_pdf
{
for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
- double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
- double * RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
- double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
- double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
- double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
- double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
- double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
- double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
- double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
- double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
- double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
- double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
- double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
- double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
- double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
- double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
- double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
- double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
- double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
- double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
- double * RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
- double * RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
- double * RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
- double * RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
- double * RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
- double * RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
- double * RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
- double * RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
- double * RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
- double * RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
- double * RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
- double * RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
- double * RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
- double * RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
- double * RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
- double * RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
- double * RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
- double * RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
- double * RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
{
- const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
- const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
- const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
- const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
- const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
- const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
- const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
- const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
- const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
- const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
- _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
- _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
- _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
- _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
- _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
- _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
- _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
- _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
- _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
- _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
- _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
- _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
- _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
- _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
- _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
- _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
- _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
- _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
+ const double streamed_0 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+ const double streamed_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+ const double streamed_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+ const double streamed_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+ const double streamed_4 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+ const double streamed_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+ const double streamed_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3];
+ const double streamed_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+ const double streamed_8 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+ const double streamed_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+ const double streamed_10 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+ const double streamed_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+ const double streamed_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3];
+ const double streamed_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3];
+ const double streamed_14 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+ const double streamed_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3];
+ const double streamed_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3];
+ const double streamed_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3];
+ const double streamed_18 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = streamed_0;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = streamed_1;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = streamed_2;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = streamed_3;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = streamed_4;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = streamed_5;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = streamed_6;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = streamed_7;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = streamed_8;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = streamed_9;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = streamed_10;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = streamed_11;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = streamed_12;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = streamed_13;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = streamed_14;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = streamed_15;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = streamed_16;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = streamed_17;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = streamed_18;
}
}
}
@@ -402,124 +212,48 @@ static FUNC_PREFIX void d3q19srt_kernel_streamOnlyNoAdvancement(double * RESTRIC
{
for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
- double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
- double * RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
- double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
- double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
- double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
- double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
- double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
- double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
- double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
- double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
- double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
- double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
- double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
- double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
- double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
- double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
- double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
- double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
- double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
- double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
- double * RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
- double * RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
- double * RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
- double * RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
- double * RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
- double * RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
- double * RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
- double * RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
- double * RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
- double * RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
- double * RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
- double * RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
- double * RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
- double * RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
- double * RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
- double * RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
- double * RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
- double * RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
- double * RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
{
- const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
- const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
- const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
- const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
- const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
- const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
- const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
- const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
- const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
- const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
- _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
- _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
- _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
- _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
- _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
- _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
- _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
- _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
- _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
- _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
- _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
- _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
- _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
- _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
- _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
- _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
- _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
- _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
+ const double streamed_0 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+ const double streamed_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+ const double streamed_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+ const double streamed_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+ const double streamed_4 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+ const double streamed_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+ const double streamed_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3];
+ const double streamed_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+ const double streamed_8 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+ const double streamed_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+ const double streamed_10 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+ const double streamed_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+ const double streamed_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3];
+ const double streamed_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3];
+ const double streamed_14 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+ const double streamed_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3];
+ const double streamed_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3];
+ const double streamed_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3];
+ const double streamed_18 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = streamed_0;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = streamed_1;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = streamed_2;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = streamed_3;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = streamed_4;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = streamed_5;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = streamed_6;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = streamed_7;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = streamed_8;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = streamed_9;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = streamed_10;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = streamed_11;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = streamed_12;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = streamed_13;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = streamed_14;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = streamed_15;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = streamed_16;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = streamed_17;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = streamed_18;
}
}
}
@@ -532,80 +266,34 @@ static FUNC_PREFIX void d3q19srt_kernel_initialise(double * RESTRICT const _data
{
for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1)
{
- double * RESTRICT _data_density_20_30 = _data_density + _stride_density_2*ctr_2;
- double * RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2;
- double * RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3;
- double * RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3;
- double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
- double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1)
{
- double * RESTRICT _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30;
- double * RESTRICT _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30;
- double * RESTRICT _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31;
- double * RESTRICT _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32;
- double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
- double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
- double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
- double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
- double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
- double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
- double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
- double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
- double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
- double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
- double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
- double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
- double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
- double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
- double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
- double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
- double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
- double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
- double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1)
{
- const double rho = _data_density_20_30_10[_stride_density_0*ctr_0];
+ const double rho = _data_density[_stride_density_0*ctr_0 + _stride_density_1*ctr_1 + _stride_density_2*ctr_2];
const double delta_rho = rho - 1.0;
- const double u_0 = _data_velocity_20_30_10[_stride_velocity_0*ctr_0];
- const double u_1 = _data_velocity_20_31_10[_stride_velocity_0*ctr_0];
- const double u_2 = _data_velocity_20_32_10[_stride_velocity_0*ctr_0];
- _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = delta_rho*0.33333333333333331 - 0.33333333333333331*(u_0*u_0) - 0.33333333333333331*(u_1*u_1) - 0.33333333333333331*(u_2*u_2);
- _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_1*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1);
- _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_1*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1);
- _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_0*-0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0);
- _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_0*0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0);
- _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_2*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2);
- _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = delta_rho*0.055555555555555552 + u_2*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2);
- _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*-0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
- _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
- _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*-0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
- _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
- _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
- _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
- _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
- _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
- _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
- _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
- _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
- _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+ const double u_0 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2];
+ const double u_1 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + _stride_velocity_3];
+ const double u_2 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3];
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2] = delta_rho*0.33333333333333331 - 0.33333333333333331*(u_0*u_0) - 0.33333333333333331*(u_1*u_1) - 0.33333333333333331*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] = delta_rho*0.055555555555555552 + u_1*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_1*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_1*u_1);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_0*-0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_0*0.16666666666666666 - 0.16666666666666666*(u_1*u_1) - 0.16666666666666666*(u_2*u_2) + 0.16666666666666666*(u_0*u_0);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_2*0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] = delta_rho*0.055555555555555552 + u_2*-0.16666666666666666 - 0.16666666666666666*(u_0*u_0) - 0.16666666666666666*(u_1*u_1) + 0.16666666666666666*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*-0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*0.083333333333333329 + u_1*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_1*0.25 + u_0*-0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_1*-0.25 + u_0*0.083333333333333329 + u_1*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_1*u_1);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*-0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*0.083333333333333329 + u_2*0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_1*u_2*-0.25 + u_1*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_1*u_2*0.25 + u_1*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_1*u_1) + 0.083333333333333329*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_2*0.25 + u_0*-0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] = delta_rho*0.027777777777777776 + u_0*u_2*-0.25 + u_0*0.083333333333333329 + u_2*-0.083333333333333329 + 0.083333333333333329*(u_0*u_0) + 0.083333333333333329*(u_2*u_2);
}
}
}
@@ -618,71 +306,25 @@ static FUNC_PREFIX void d3q19srt_kernel_getter(double * RESTRICT _data_density,
{
for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
- double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
- double * RESTRICT _data_density_20_30 = _data_density + _stride_density_2*ctr_2;
- double * RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2;
- double * RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3;
- double * RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
- double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
- double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
- double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
- double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
- double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
- double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
- double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
- double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
- double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
- double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
- double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
- double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
- double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
- double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
- double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
- double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
- double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
- double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
- double * RESTRICT _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30;
- double * RESTRICT _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30;
- double * RESTRICT _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31;
- double * RESTRICT _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32;
for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1)
{
- const double vel0Term = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
- const double momdensity_0 = vel0Term - 1.0*_data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
- const double vel1Term = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
- const double momdensity_1 = vel1Term - 1.0*_data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
- const double vel2Term = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
- const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
- const double momdensity_2 = vel2Term - 1.0*_data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0];
+ const double vel0Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+ const double momdensity_0 = vel0Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+ const double vel1Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+ const double momdensity_1 = vel1Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+ const double vel2Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3];
+ const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+ const double momdensity_2 = vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3];
const double rho = delta_rho + 1.0;
const double u_0 = momdensity_0;
const double u_1 = momdensity_1;
const double u_2 = momdensity_2;
- _data_density_20_30_10[_stride_density_0*ctr_0] = rho;
- _data_velocity_20_30_10[_stride_velocity_0*ctr_0] = u_0;
- _data_velocity_20_31_10[_stride_velocity_0*ctr_0] = u_1;
- _data_velocity_20_32_10[_stride_velocity_0*ctr_0] = u_2;
+ _data_density[_stride_density_0*ctr_0 + _stride_density_1*ctr_1 + _stride_density_2*ctr_2] = rho;
+ _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2] = u_0;
+ _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + _stride_velocity_3] = u_1;
+ _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3] = u_2;
}
}
}
diff --git a/src/lbm_generated/sweep_collection/D3Q19SRT.h b/src/lbm_generated/sweep_collection/D3Q19SRT.h
index 2fdb3850..56475b66 100644
--- a/src/lbm_generated/sweep_collection/D3Q19SRT.h
+++ b/src/lbm_generated/sweep_collection/D3Q19SRT.h
@@ -59,20 +59,20 @@ namespace lbm {
class D3Q19SRT
{
-public:
- enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+ public:
+ enum Type { ALL = 0, INNER = 1, OUTER = 2 };
D3Q19SRT(const shared_ptr< StructuredBlockStorage > & blocks, BlockDataID pdfsID_, BlockDataID densityID_, BlockDataID velocityID_, double omega, const Cell & outerWidth=Cell(1, 1, 1))
- : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth)
+ : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth)
{
+ validInnerOuterSplit_= true;
+
for (auto& iBlock : *blocks)
{
- if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 ||
- int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 ||
- int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
- WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock")
+ if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 || int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 || int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
+ validInnerOuterSplit_ = false;
}
};
@@ -117,27 +117,33 @@ class D3Q19SRT
std::function streamCollide(Type type)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { streamCollideInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { streamCollideOuter(block); };
- default:
- return [this](IBlock* block) { streamCollide(block); };
+ case Type::INNER:
+ return [this](IBlock* block) { streamCollideInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { streamCollideOuter(block); };
+ default:
+ return [this](IBlock* block) { streamCollide(block); };
}
}
std::function streamCollide(Type type, const cell_idx_t ghost_layers)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { streamCollideInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { streamCollideOuter(block); };
- default:
- return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); };
+ case Type::INNER:
+ return [this](IBlock* block) { streamCollideInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { streamCollideOuter(block); };
+ default:
+ return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); };
}
}
@@ -298,14 +304,14 @@ class D3Q19SRT
layers_.push_back(ci);
}
-
+
for( auto & ci: layers_ )
{
streamCollideCellInterval(pdfs, pdfs_tmp, omega, ci);
}
-
+
- pdfs->swapDataPointers(pdfs_tmp);
+ pdfs->swapDataPointers(pdfs_tmp);
}
@@ -317,27 +323,33 @@ class D3Q19SRT
std::function collide(Type type)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { collideInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { collideOuter(block); };
- default:
- return [this](IBlock* block) { collide(block); };
+ case Type::INNER:
+ return [this](IBlock* block) { collideInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { collideOuter(block); };
+ default:
+ return [this](IBlock* block) { collide(block); };
}
}
std::function collide(Type type, const cell_idx_t ghost_layers)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { collideInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { collideOuter(block); };
- default:
- return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); };
+ case Type::INNER:
+ return [this](IBlock* block) { collideInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { collideOuter(block); };
+ default:
+ return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); };
}
}
@@ -425,14 +437,14 @@ class D3Q19SRT
layers_.push_back(ci);
}
-
+
for( auto & ci: layers_ )
{
collideCellInterval(pdfs, omega, ci);
}
-
+
-
+
}
@@ -443,27 +455,33 @@ class D3Q19SRT
std::function stream(Type type)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { streamInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { streamOuter(block); };
- default:
- return [this](IBlock* block) { stream(block); };
+ case Type::INNER:
+ return [this](IBlock* block) { streamInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { streamOuter(block); };
+ default:
+ return [this](IBlock* block) { stream(block); };
}
}
std::function stream(Type type, const cell_idx_t ghost_layers)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { streamInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { streamOuter(block); };
- default:
- return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); };
+ case Type::INNER:
+ return [this](IBlock* block) { streamInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { streamOuter(block); };
+ default:
+ return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); };
}
}
@@ -624,14 +642,14 @@ class D3Q19SRT
layers_.push_back(ci);
}
-
+
for( auto & ci: layers_ )
{
streamCellInterval(pdfs, pdfs_tmp, ci);
}
-
+
- pdfs->swapDataPointers(pdfs_tmp);
+ pdfs->swapDataPointers(pdfs_tmp);
}
@@ -643,27 +661,33 @@ class D3Q19SRT
std::function streamOnlyNoAdvancement(Type type)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
- default:
- return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
+ case Type::INNER:
+ return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+ default:
+ return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
}
}
std::function streamOnlyNoAdvancement(Type type, const cell_idx_t ghost_layers)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
- default:
- return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); };
+ case Type::INNER:
+ return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+ default:
+ return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); };
}
}
@@ -821,14 +845,14 @@ class D3Q19SRT
layers_.push_back(ci);
}
-
+
for( auto & ci: layers_ )
{
streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, ci);
}
-
+
-
+
}
@@ -839,27 +863,33 @@ class D3Q19SRT
std::function initialise(Type type)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { initialiseInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { initialiseOuter(block); };
- default:
- return [this](IBlock* block) { initialise(block); };
+ case Type::INNER:
+ return [this](IBlock* block) { initialiseInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { initialiseOuter(block); };
+ default:
+ return [this](IBlock* block) { initialise(block); };
}
}
std::function initialise(Type type, const cell_idx_t ghost_layers)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { initialiseInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { initialiseOuter(block); };
- default:
- return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); };
+ case Type::INNER:
+ return [this](IBlock* block) { initialiseInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { initialiseOuter(block); };
+ default:
+ return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); };
}
}
@@ -870,9 +900,9 @@ class D3Q19SRT
const cell_idx_t ghost_layers = 0;
- auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
- auto velocity = block->getData< field::GhostLayerField >(velocityID);
auto density = block->getData< field::GhostLayerField >(densityID);
+ auto velocity = block->getData< field::GhostLayerField >(velocityID);
+ auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
@@ -884,9 +914,9 @@ class D3Q19SRT
{
- auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
- auto velocity = block->getData< field::GhostLayerField >(velocityID);
auto density = block->getData< field::GhostLayerField >(densityID);
+ auto velocity = block->getData< field::GhostLayerField >(velocityID);
+ auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
@@ -898,9 +928,9 @@ class D3Q19SRT
void initialiseCellInterval(IBlock * block, const CellInterval & ci)
{
- auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
- auto velocity = block->getData< field::GhostLayerField >(velocityID);
auto density = block->getData< field::GhostLayerField >(densityID);
+ auto velocity = block->getData< field::GhostLayerField >(velocityID);
+ auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
@@ -910,9 +940,9 @@ class D3Q19SRT
void initialiseInner(IBlock * block)
{
- auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
- auto velocity = block->getData< field::GhostLayerField >(velocityID);
auto density = block->getData< field::GhostLayerField >(densityID);
+ auto velocity = block->getData< field::GhostLayerField >(velocityID);
+ auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
@@ -926,9 +956,9 @@ class D3Q19SRT
void initialiseOuter(IBlock * block)
{
- auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
- auto velocity = block->getData< field::GhostLayerField >(velocityID);
auto density = block->getData< field::GhostLayerField >(densityID);
+ auto velocity = block->getData< field::GhostLayerField >(velocityID);
+ auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
@@ -957,14 +987,14 @@ class D3Q19SRT
layers_.push_back(ci);
}
-
+
for( auto & ci: layers_ )
{
initialiseCellInterval(density, pdfs, velocity, ci);
}
-
+
-
+
}
@@ -975,27 +1005,33 @@ class D3Q19SRT
std::function calculateMacroscopicParameters(Type type)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
- default:
- return [this](IBlock* block) { calculateMacroscopicParameters(block); };
+ case Type::INNER:
+ return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
+ default:
+ return [this](IBlock* block) { calculateMacroscopicParameters(block); };
}
}
std::function calculateMacroscopicParameters(Type type, const cell_idx_t ghost_layers)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
- default:
- return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); };
+ case Type::INNER:
+ return [this](IBlock* block) { calculateMacroscopicParametersInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { calculateMacroscopicParametersOuter(block); };
+ default:
+ return [this, ghost_layers](IBlock* block) { calculateMacroscopicParameters(block, ghost_layers); };
}
}
@@ -1006,9 +1042,9 @@ class D3Q19SRT
const cell_idx_t ghost_layers = 0;
- auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
- auto velocity = block->getData< field::GhostLayerField >(velocityID);
auto density = block->getData< field::GhostLayerField >(densityID);
+ auto velocity = block->getData< field::GhostLayerField >(velocityID);
+ auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
@@ -1020,9 +1056,9 @@ class D3Q19SRT
{
- auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
- auto velocity = block->getData< field::GhostLayerField >(velocityID);
auto density = block->getData< field::GhostLayerField >(densityID);
+ auto velocity = block->getData< field::GhostLayerField >(velocityID);
+ auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
@@ -1034,9 +1070,9 @@ class D3Q19SRT
void calculateMacroscopicParametersCellInterval(IBlock * block, const CellInterval & ci)
{
- auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
- auto velocity = block->getData< field::GhostLayerField >(velocityID);
auto density = block->getData< field::GhostLayerField >(densityID);
+ auto velocity = block->getData< field::GhostLayerField >(velocityID);
+ auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
@@ -1046,9 +1082,9 @@ class D3Q19SRT
void calculateMacroscopicParametersInner(IBlock * block)
{
- auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
- auto velocity = block->getData< field::GhostLayerField >(velocityID);
auto density = block->getData< field::GhostLayerField >(densityID);
+ auto velocity = block->getData< field::GhostLayerField >(velocityID);
+ auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
@@ -1062,9 +1098,9 @@ class D3Q19SRT
void calculateMacroscopicParametersOuter(IBlock * block)
{
- auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
- auto velocity = block->getData< field::GhostLayerField >(velocityID);
auto density = block->getData< field::GhostLayerField >(densityID);
+ auto velocity = block->getData< field::GhostLayerField >(velocityID);
+ auto pdfs = block->getData< field::GhostLayerField >(pdfsID);
@@ -1093,32 +1129,33 @@ class D3Q19SRT
layers_.push_back(ci);
}
-
+
for( auto & ci: layers_ )
{
calculateMacroscopicParametersCellInterval(density, pdfs, velocity, ci);
}
-
+
-
+
}
- private:
- shared_ptr< StructuredBlockStorage > blocks_;
- BlockDataID pdfsID;
+ private:
+ shared_ptr< StructuredBlockStorage > blocks_;
+ BlockDataID pdfsID;
BlockDataID densityID;
BlockDataID velocityID;
double omega_;
private: std::set< field::GhostLayerField *, field::SwapableCompare< field::GhostLayerField * > > cache_pdfs_;
- Cell outerWidth_;
- std::vector layers_;
+ Cell outerWidth_;
+ std::vector layers_;
+ bool validInnerOuterSplit_;
-
+
};
diff --git a/src/lbm_generated/sweep_collection/D3Q27SRT.cpp b/src/lbm_generated/sweep_collection/D3Q27SRT.cpp
index ce89749f..24a8f232 100644
--- a/src/lbm_generated/sweep_collection/D3Q27SRT.cpp
+++ b/src/lbm_generated/sweep_collection/D3Q27SRT.cpp
@@ -41,159 +41,51 @@ static FUNC_PREFIX void d3q27srt_kernel_streamCollide(double * RESTRICT const _d
{
for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_2m1_321 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_319 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_320 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_322 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
- double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
- double * RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_319 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_320 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_321 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_322 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_323 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_324 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_325 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_326 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_2m1_321_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_321;
- double * RESTRICT _data_pdfs_2m1_319_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_319;
- double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
- double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
- double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
- double * RESTRICT _data_pdfs_21_325_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_325;
- double * RESTRICT _data_pdfs_21_323_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_323;
- double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
- double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
- double * RESTRICT _data_pdfs_2m1_320_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_320;
- double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
- double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
- double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
- double * RESTRICT _data_pdfs_21_324_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_324;
- double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
- double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
- double * RESTRICT _data_pdfs_2m1_322_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_322;
- double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
- double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
- double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
- double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
- double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
- double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
- double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
- double * RESTRICT _data_pdfs_21_326_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_326;
- double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
- double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
- double * RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
- double * RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
- double * RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
- double * RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
- double * RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
- double * RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
- double * RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
- double * RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
- double * RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
- double * RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
- double * RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
- double * RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
- double * RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
- double * RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
- double * RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
- double * RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
- double * RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
- double * RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
- double * RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
- double * RESTRICT _data_pdfs_tmp_20_319_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_319;
- double * RESTRICT _data_pdfs_tmp_20_320_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_320;
- double * RESTRICT _data_pdfs_tmp_20_321_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_321;
- double * RESTRICT _data_pdfs_tmp_20_322_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_322;
- double * RESTRICT _data_pdfs_tmp_20_323_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_323;
- double * RESTRICT _data_pdfs_tmp_20_324_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_324;
- double * RESTRICT _data_pdfs_tmp_20_325_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_325;
- double * RESTRICT _data_pdfs_tmp_20_326_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_326;
for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
{
- const double vel0Term = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double vel1Term = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double vel2Term = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
- const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
- const double u_0 = vel0Term - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double u_1 = vel1Term - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double u_2 = vel2Term - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double u0Mu1 = u_0 + u_1*-1.0;
+ const double vel0Term = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3];
+ const double vel1Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+ const double vel2Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+ const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+ const double u_0 = vel0Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3];
+ const double u_1 = vel1Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3];
+ const double u_2 = vel2Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3];
+ const double u0Mu1 = u_0 - u_1;
const double u0Pu1 = u_0 + u_1;
const double u1Pu2 = u_1 + u_2;
- const double u1Mu2 = u_1 + u_2*-1.0;
- const double u0Mu2 = u_0 + u_2*-1.0;
+ const double u1Mu2 = u_1 - u_2;
+ const double u0Mu2 = u_0 - u_2;
const double u0Pu2 = u_0 + u_2;
const double f_eq_common = delta_rho - 1.5*(u_0*u_0) - 1.5*(u_1*u_1) - 1.5*(u_2*u_2);
- _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.29629629629629628 - 1.0*_data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 - 1.0*_data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 - 1.0*_data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_1*u_1)) + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 - 1.0*_data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.33333333333333331*(u_0*u_0)) + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 - 1.0*_data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 - 1.0*_data_pdfs_21_36_10[_stride_pdfs_0*ctr_0] + 0.33333333333333331*(u_2*u_2)) + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 - 1.0*_data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Mu1*u0Mu1)) + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 - 1.0*_data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Pu1*u0Pu1)) + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 - 1.0*_data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Pu1*u0Pu1)) + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 - 1.0*_data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Mu1*u0Mu1)) + _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 - 1.0*_data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 - 1.0*_data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 - 1.0*_data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Mu2*u0Mu2)) + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 - 1.0*_data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Pu2*u0Pu2)) + _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 - 1.0*_data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Mu2*u1Mu2)) + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 - 1.0*_data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + 0.083333333333333329*(u1Pu2*u1Pu2)) + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
- _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 - 1.0*_data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.083333333333333329*(u0Pu2*u0Pu2)) + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 - 1.0*_data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.083333333333333329*(u0Mu2*u0Mu2)) + _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_319_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_320_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_321_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_322_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 - 1.0*_data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_323_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_324_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_325_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- _data_pdfs_tmp_20_326_10[_stride_pdfs_tmp_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 - 1.0*_data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = omega*(f_eq_common*0.29629629629629628 - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 + 0.33333333333333331*(u_1*u_1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 + 0.33333333333333331*(u_1*u_1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 + 0.33333333333333331*(u_0*u_0) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 + 0.33333333333333331*(u_0*u_0) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 + 0.33333333333333331*(u_2*u_2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 + 0.33333333333333331*(u_2*u_2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 + 0.083333333333333329*(u0Mu1*u0Mu1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 + 0.083333333333333329*(u0Pu1*u0Pu1) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 + 0.083333333333333329*(u0Pu1*u0Pu1) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 + 0.083333333333333329*(u0Mu1*u0Mu1) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 + 0.083333333333333329*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 + 0.083333333333333329*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 + 0.083333333333333329*(u0Mu2*u0Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 + 0.083333333333333329*(u0Pu2*u0Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 + 0.083333333333333329*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 + 0.083333333333333329*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 + 0.083333333333333329*(u0Pu2*u0Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 + 0.083333333333333329*(u0Mu2*u0Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2) - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3]) + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3];
}
}
}
@@ -206,132 +98,78 @@ static FUNC_PREFIX void d3q27srt_kernel_collide(double * RESTRICT _data_pdfs, i
{
for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_321 = _data_pdfs + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_319 = _data_pdfs + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_320 = _data_pdfs + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
- double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_322 = _data_pdfs + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
- double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
- double * RESTRICT _data_pdfs_20_326_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_326;
- double * RESTRICT _data_pdfs_20_323_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_323;
- double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
- double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
- double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
- double * RESTRICT _data_pdfs_20_321_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_321;
- double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
- double * RESTRICT _data_pdfs_20_324_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_324;
- double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
- double * RESTRICT _data_pdfs_20_319_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_319;
- double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
- double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
- double * RESTRICT _data_pdfs_20_325_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_325;
- double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
- double * RESTRICT _data_pdfs_20_320_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_320;
- double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
- double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
- double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
- double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
- double * RESTRICT _data_pdfs_20_322_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_322;
- double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
- double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
- double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
- double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
- double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
{
- const double xi_1 = _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0];
- const double xi_2 = _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0];
- const double xi_3 = _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0];
- const double xi_4 = _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0];
- const double xi_5 = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0];
- const double xi_6 = _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0];
- const double xi_7 = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0];
- const double xi_8 = _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0];
- const double xi_9 = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0];
- const double xi_10 = _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0];
- const double xi_11 = _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
- const double xi_12 = _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0];
- const double xi_13 = _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0];
- const double xi_14 = _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
- const double xi_15 = _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0];
- const double xi_16 = _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0];
- const double xi_17 = _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0];
- const double xi_18 = _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
- const double xi_19 = _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
- const double xi_20 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
- const double xi_21 = _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0];
- const double xi_22 = _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0];
- const double xi_23 = _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0];
- const double xi_24 = _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0];
- const double xi_25 = _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0];
- const double xi_26 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0];
- const double xi_27 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0];
- const double vel0Term = xi_12 + xi_14 + xi_15 + xi_24 + xi_25 + xi_26 + xi_4 + xi_5 + xi_8;
- const double vel1Term = xi_10 + xi_11 + xi_13 + xi_17 + xi_21 + xi_9;
- const double vel2Term = xi_1 + xi_19 + xi_22 + xi_7;
- const double delta_rho = vel0Term + vel1Term + vel2Term + xi_16 + xi_18 + xi_2 + xi_20 + xi_23 + xi_27 + xi_3 + xi_6;
- const double u_0 = vel0Term + xi_1*-1.0 + xi_10*-1.0 + xi_11*-1.0 + xi_17*-1.0 + xi_18*-1.0 + xi_22*-1.0 + xi_23*-1.0 + xi_27*-1.0 + xi_3*-1.0;
- const double u_1 = vel1Term + xi_12 + xi_14 + xi_15*-1.0 + xi_16*-1.0 + xi_18*-1.0 + xi_22*-1.0 + xi_3*-1.0 + xi_4 + xi_5*-1.0 + xi_6*-1.0 + xi_7*-1.0 + xi_8*-1.0;
- const double u_2 = vel2Term + xi_10*-1.0 + xi_12 + xi_15*-1.0 + xi_17 + xi_2*-1.0 + xi_21*-1.0 + xi_23*-1.0 + xi_24 + xi_25*-1.0 + xi_3*-1.0 + xi_4*-1.0 + xi_6*-1.0 + xi_8 + xi_9;
- const double u0Mu1 = u_0 + u_1*-1.0;
+ const double xi_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3];
+ const double xi_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3];
+ const double xi_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+ const double xi_4 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+ const double xi_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3];
+ const double xi_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3];
+ const double xi_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3];
+ const double xi_8 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3];
+ const double xi_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3];
+ const double xi_10 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3];
+ const double xi_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3];
+ const double xi_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3];
+ const double xi_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3];
+ const double xi_14 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+ const double xi_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3];
+ const double xi_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3];
+ const double xi_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3];
+ const double xi_18 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3];
+ const double xi_19 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+ const double xi_20 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+ const double xi_21 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+ const double xi_22 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3];
+ const double xi_23 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3];
+ const double xi_24 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3];
+ const double xi_25 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+ const double xi_26 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+ const double xi_27 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+ const double vel0Term = xi_1 + xi_10 + xi_13 + xi_14 + xi_16 + xi_17 + xi_19 + xi_24 + xi_25;
+ const double vel1Term = xi_12 + xi_22 + xi_23 + xi_27 + xi_4 + xi_9;
+ const double vel2Term = xi_15 + xi_5 + xi_6 + xi_7;
+ const double delta_rho = vel0Term + vel1Term + vel2Term + xi_11 + xi_18 + xi_2 + xi_20 + xi_21 + xi_26 + xi_3 + xi_8;
+ const double u_0 = vel0Term - xi_11 - xi_12 - xi_15 - xi_20 - xi_23 - xi_26 - xi_27 - xi_5 - xi_8;
+ const double u_1 = vel1Term + xi_1 + xi_10 - xi_15 - xi_16 + xi_19 - xi_2 - xi_20 - xi_21 - xi_24 - xi_25 - xi_7 - xi_8;
+ const double u_2 = vel2Term - xi_1 + xi_10 - xi_11 + xi_12 - xi_13 - xi_16 + xi_17 - xi_18 - xi_2 - xi_22 - xi_23 + xi_24 - xi_8 + xi_9;
+ const double u0Mu1 = u_0 - u_1;
const double u0Pu1 = u_0 + u_1;
const double u1Pu2 = u_1 + u_2;
- const double u1Mu2 = u_1 + u_2*-1.0;
- const double u0Mu2 = u_0 + u_2*-1.0;
+ const double u1Mu2 = u_1 - u_2;
+ const double u0Mu2 = u_0 - u_2;
const double u0Pu2 = u_0 + u_2;
const double f_eq_common = delta_rho - 1.5*(u_0*u_0) - 1.5*(u_1*u_1) - 1.5*(u_2*u_2);
- _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.29629629629629628 + xi_20*-1.0) + xi_20;
- _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 + xi_13*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_13;
- _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 + xi_16*-1.0 + 0.33333333333333331*(u_1*u_1)) + xi_16;
- _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 + xi_27*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_27;
- _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 + xi_26*-1.0 + 0.33333333333333331*(u_0*u_0)) + xi_26;
- _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 + xi_19*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_19;
- _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 + xi_2*-1.0 + 0.33333333333333331*(u_2*u_2)) + xi_2;
- _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 + xi_11*-1.0 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_11;
- _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 + xi_14*-1.0 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_14;
- _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 + xi_18*-1.0 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_18;
- _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 + xi_5*-1.0 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_5;
- _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 + xi_9*-1.0 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_9;
- _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 + xi_7*-1.0 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_7;
- _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 + xi_1*-1.0 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_1;
- _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 + xi_24*-1.0 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_24;
- _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 + xi_21*-1.0 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_21;
- _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 + xi_6*-1.0 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_6;
- _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 + xi_23*-1.0 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_23;
- _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 + xi_25*-1.0 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_25;
- _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 + xi_12*-1.0 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_12;
- _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 + xi_17*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_17;
- _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 + xi_8*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_8;
- _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 + xi_22*-1.0 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_22;
- _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 + xi_4*-1.0 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_4;
- _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 + xi_10*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_10;
- _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 + xi_15*-1.0 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_15;
- _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 + xi_3*-1.0 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_3;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2] = omega*(f_eq_common*0.29629629629629628 - xi_3) + xi_3;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_1*0.22222222222222221 - xi_4 + 0.33333333333333331*(u_1*u_1)) + xi_4;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_1*-0.22222222222222221 - xi_21 + 0.33333333333333331*(u_1*u_1)) + xi_21;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_0*-0.22222222222222221 - xi_26 + 0.33333333333333331*(u_0*u_0)) + xi_26;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_0*0.22222222222222221 - xi_14 + 0.33333333333333331*(u_0*u_0)) + xi_14;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_2*0.22222222222222221 - xi_6 + 0.33333333333333331*(u_2*u_2)) + xi_6;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3] = omega*(f_eq_common*0.07407407407407407 + u_2*-0.22222222222222221 - xi_18 + 0.33333333333333331*(u_2*u_2)) + xi_18;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*-0.055555555555555552 - xi_27 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_27;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*0.055555555555555552 - xi_19 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_19;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu1*-0.055555555555555552 - xi_20 + 0.083333333333333329*(u0Pu1*u0Pu1)) + xi_20;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu1*0.055555555555555552 - xi_25 + 0.083333333333333329*(u0Mu1*u0Mu1)) + xi_25;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*0.055555555555555552 - xi_9 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_9;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*-0.055555555555555552 - xi_7 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_7;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*-0.055555555555555552 - xi_5 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_5;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*0.055555555555555552 - xi_17 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_17;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u1Mu2*0.055555555555555552 - xi_22 + 0.083333333333333329*(u1Mu2*u1Mu2)) + xi_22;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u1Pu2*-0.055555555555555552 - xi_2 + 0.083333333333333329*(u1Pu2*u1Pu2)) + xi_2;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Pu2*-0.055555555555555552 - xi_11 + 0.083333333333333329*(u0Pu2*u0Pu2)) + xi_11;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] = omega*(f_eq_common*0.018518518518518517 + u0Mu2*0.055555555555555552 - xi_13 + 0.083333333333333329*(u0Mu2*u0Mu2)) + xi_13;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*0.013888888888888888 - xi_10 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_10;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*0.013888888888888888 - xi_12 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_12;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*0.013888888888888888 - xi_24 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_24;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*0.013888888888888888 - xi_15 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_15;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*0.013888888888888888 + u_2*-0.013888888888888888 - xi_1 + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_1;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*-0.013888888888888888 + u_2*-0.013888888888888888 - xi_23 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Mu2*u1Mu2)) + xi_23;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Mu1*0.013888888888888888 + u_2*-0.013888888888888888 - xi_16 + 0.020833333333333332*(u0Mu1*u0Mu1) + 0.020833333333333332*(u0Mu2*u0Mu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_16;
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] = omega*(delta_rho*-0.013888888888888888 + f_eq_common*0.018518518518518517 + u0Pu1*-0.013888888888888888 + u_2*-0.013888888888888888 - xi_8 + 0.020833333333333332*(u0Pu1*u0Pu1) + 0.020833333333333332*(u0Pu2*u0Pu2) + 0.020833333333333332*(u1Pu2*u1Pu2)) + xi_8;
}
}
}
@@ -344,172 +182,64 @@ static FUNC_PREFIX void d3q27srt_kernel_stream(double * RESTRICT const _data_pdf
{
for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
- double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_319 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_320 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_321 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_322 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
- double * RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_319 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_320 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_321 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_322 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_323 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_324 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_325 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_326 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
- double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
- double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
- double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
- double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
- double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
- double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
- double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
- double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
- double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
- double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
- double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
- double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
- double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
- double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
- double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
- double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
- double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
- double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
- double * RESTRICT _data_pdfs_2m1_319_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_319;
- double * RESTRICT _data_pdfs_2m1_320_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_320;
- double * RESTRICT _data_pdfs_2m1_321_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_321;
- double * RESTRICT _data_pdfs_2m1_322_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_322;
- double * RESTRICT _data_pdfs_21_323_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_323;
- double * RESTRICT _data_pdfs_21_324_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_324;
- double * RESTRICT _data_pdfs_21_325_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_325;
- double * RESTRICT _data_pdfs_21_326_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_326;
- double * RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
- double * RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
- double * RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
- double * RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
- double * RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
- double * RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
- double * RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
- double * RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
- double * RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
- double * RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
- double * RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
- double * RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
- double * RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
- double * RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
- double * RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
- double * RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
- double * RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
- double * RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
- double * RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
- double * RESTRICT _data_pdfs_tmp_20_319_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_319;
- double * RESTRICT _data_pdfs_tmp_20_320_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_320;
- double * RESTRICT _data_pdfs_tmp_20_321_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_321;
- double * RESTRICT _data_pdfs_tmp_20_322_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_322;
- double * RESTRICT _data_pdfs_tmp_20_323_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_323;
- double * RESTRICT _data_pdfs_tmp_20_324_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_324;
- double * RESTRICT _data_pdfs_tmp_20_325_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_325;
- double * RESTRICT _data_pdfs_tmp_20_326_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_326;
for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
{
- const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
- const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
- const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
- const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
- const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
- const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
- const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
- const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
- const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
- const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_19 = _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_20 = _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_21 = _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_22 = _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_23 = _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_24 = _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_25 = _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_26 = _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
- _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
- _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
- _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
- _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
- _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
- _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
- _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
- _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
- _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
- _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
- _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
- _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
- _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
- _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
- _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
- _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
- _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
- _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
- _data_pdfs_tmp_20_319_10[_stride_pdfs_tmp_0*ctr_0] = streamed_19;
- _data_pdfs_tmp_20_320_10[_stride_pdfs_tmp_0*ctr_0] = streamed_20;
- _data_pdfs_tmp_20_321_10[_stride_pdfs_tmp_0*ctr_0] = streamed_21;
- _data_pdfs_tmp_20_322_10[_stride_pdfs_tmp_0*ctr_0] = streamed_22;
- _data_pdfs_tmp_20_323_10[_stride_pdfs_tmp_0*ctr_0] = streamed_23;
- _data_pdfs_tmp_20_324_10[_stride_pdfs_tmp_0*ctr_0] = streamed_24;
- _data_pdfs_tmp_20_325_10[_stride_pdfs_tmp_0*ctr_0] = streamed_25;
- _data_pdfs_tmp_20_326_10[_stride_pdfs_tmp_0*ctr_0] = streamed_26;
+ const double streamed_0 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+ const double streamed_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+ const double streamed_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+ const double streamed_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+ const double streamed_4 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+ const double streamed_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+ const double streamed_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3];
+ const double streamed_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+ const double streamed_8 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+ const double streamed_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+ const double streamed_10 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+ const double streamed_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+ const double streamed_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3];
+ const double streamed_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3];
+ const double streamed_14 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+ const double streamed_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3];
+ const double streamed_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3];
+ const double streamed_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3];
+ const double streamed_18 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3];
+ const double streamed_19 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3];
+ const double streamed_20 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3];
+ const double streamed_21 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3];
+ const double streamed_22 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3];
+ const double streamed_23 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3];
+ const double streamed_24 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3];
+ const double streamed_25 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3];
+ const double streamed_26 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = streamed_0;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = streamed_1;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = streamed_2;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = streamed_3;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = streamed_4;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = streamed_5;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = streamed_6;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = streamed_7;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = streamed_8;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = streamed_9;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = streamed_10;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = streamed_11;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = streamed_12;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = streamed_13;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = streamed_14;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = streamed_15;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = streamed_16;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = streamed_17;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = streamed_18;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3] = streamed_19;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3] = streamed_20;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3] = streamed_21;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3] = streamed_22;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3] = streamed_23;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3] = streamed_24;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3] = streamed_25;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3] = streamed_26;
}
}
}
@@ -522,172 +252,64 @@ static FUNC_PREFIX void d3q27srt_kernel_streamOnlyNoAdvancement(double * RESTRIC
{
for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
- double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_319 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_320 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_321 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_2m1_322 = _data_pdfs + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_21_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2;
- double * RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_319 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_320 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_321 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_322 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_323 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_324 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_325 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3;
- double * RESTRICT _data_pdfs_tmp_20_326 = _data_pdfs_tmp + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
- double * RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
- double * RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
- double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
- double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
- double * RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_35;
- double * RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_36;
- double * RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
- double * RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
- double * RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
- double * RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
- double * RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
- double * RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
- double * RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_313;
- double * RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_2m1_314;
- double * RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
- double * RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
- double * RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_317;
- double * RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_21_318;
- double * RESTRICT _data_pdfs_2m1_319_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_319;
- double * RESTRICT _data_pdfs_2m1_320_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_320;
- double * RESTRICT _data_pdfs_2m1_321_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_321;
- double * RESTRICT _data_pdfs_2m1_322_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_322;
- double * RESTRICT _data_pdfs_21_323_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_323;
- double * RESTRICT _data_pdfs_21_324_1m1 = _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _data_pdfs_21_324;
- double * RESTRICT _data_pdfs_21_325_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_325;
- double * RESTRICT _data_pdfs_21_326_11 = _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _data_pdfs_21_326;
- double * RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_30;
- double * RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_31;
- double * RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_32;
- double * RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_33;
- double * RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_34;
- double * RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_35;
- double * RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_36;
- double * RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_37;
- double * RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_38;
- double * RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_39;
- double * RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_310;
- double * RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_311;
- double * RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_312;
- double * RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_313;
- double * RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_314;
- double * RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_315;
- double * RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_316;
- double * RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_317;
- double * RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_318;
- double * RESTRICT _data_pdfs_tmp_20_319_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_319;
- double * RESTRICT _data_pdfs_tmp_20_320_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_320;
- double * RESTRICT _data_pdfs_tmp_20_321_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_321;
- double * RESTRICT _data_pdfs_tmp_20_322_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_322;
- double * RESTRICT _data_pdfs_tmp_20_323_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_323;
- double * RESTRICT _data_pdfs_tmp_20_324_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_324;
- double * RESTRICT _data_pdfs_tmp_20_325_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_325;
- double * RESTRICT _data_pdfs_tmp_20_326_10 = _stride_pdfs_tmp_1*ctr_1 + _data_pdfs_tmp_20_326;
for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1)
{
- const double streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
- const double streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0];
- const double streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0];
- const double streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
- const double streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
- const double streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
- const double streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
- const double streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0];
- const double streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0];
- const double streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_19 = _data_pdfs_2m1_319_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_20 = _data_pdfs_2m1_320_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_21 = _data_pdfs_2m1_321_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_22 = _data_pdfs_2m1_322_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_23 = _data_pdfs_21_323_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_24 = _data_pdfs_21_324_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- const double streamed_25 = _data_pdfs_21_325_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
- const double streamed_26 = _data_pdfs_21_326_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
- _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0*ctr_0] = streamed_0;
- _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0*ctr_0] = streamed_1;
- _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0*ctr_0] = streamed_2;
- _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0*ctr_0] = streamed_3;
- _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0*ctr_0] = streamed_4;
- _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0*ctr_0] = streamed_5;
- _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0*ctr_0] = streamed_6;
- _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0*ctr_0] = streamed_7;
- _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0*ctr_0] = streamed_8;
- _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0*ctr_0] = streamed_9;
- _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0*ctr_0] = streamed_10;
- _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0*ctr_0] = streamed_11;
- _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0*ctr_0] = streamed_12;
- _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0*ctr_0] = streamed_13;
- _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0*ctr_0] = streamed_14;
- _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0*ctr_0] = streamed_15;
- _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0*ctr_0] = streamed_16;
- _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0*ctr_0] = streamed_17;
- _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0*ctr_0] = streamed_18;
- _data_pdfs_tmp_20_319_10[_stride_pdfs_tmp_0*ctr_0] = streamed_19;
- _data_pdfs_tmp_20_320_10[_stride_pdfs_tmp_0*ctr_0] = streamed_20;
- _data_pdfs_tmp_20_321_10[_stride_pdfs_tmp_0*ctr_0] = streamed_21;
- _data_pdfs_tmp_20_322_10[_stride_pdfs_tmp_0*ctr_0] = streamed_22;
- _data_pdfs_tmp_20_323_10[_stride_pdfs_tmp_0*ctr_0] = streamed_23;
- _data_pdfs_tmp_20_324_10[_stride_pdfs_tmp_0*ctr_0] = streamed_24;
- _data_pdfs_tmp_20_325_10[_stride_pdfs_tmp_0*ctr_0] = streamed_25;
- _data_pdfs_tmp_20_326_10[_stride_pdfs_tmp_0*ctr_0] = streamed_26;
+ const double streamed_0 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+ const double streamed_1 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+ const double streamed_2 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3];
+ const double streamed_3 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3];
+ const double streamed_4 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3];
+ const double streamed_5 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3];
+ const double streamed_6 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3];
+ const double streamed_7 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3];
+ const double streamed_8 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+ const double streamed_9 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+ const double streamed_10 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3];
+ const double streamed_11 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3];
+ const double streamed_12 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3];
+ const double streamed_13 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3];
+ const double streamed_14 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3];
+ const double streamed_15 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3];
+ const double streamed_16 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3];
+ const double streamed_17 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3];
+ const double streamed_18 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3];
+ const double streamed_19 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3];
+ const double streamed_20 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3];
+ const double streamed_21 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3];
+ const double streamed_22 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3];
+ const double streamed_23 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3];
+ const double streamed_24 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3];
+ const double streamed_25 = _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3];
+ const double streamed_26 = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3];
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2] = streamed_0;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + _stride_pdfs_tmp_3] = streamed_1;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 2*_stride_pdfs_tmp_3] = streamed_2;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 3*_stride_pdfs_tmp_3] = streamed_3;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 4*_stride_pdfs_tmp_3] = streamed_4;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 5*_stride_pdfs_tmp_3] = streamed_5;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 6*_stride_pdfs_tmp_3] = streamed_6;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 7*_stride_pdfs_tmp_3] = streamed_7;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 8*_stride_pdfs_tmp_3] = streamed_8;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 9*_stride_pdfs_tmp_3] = streamed_9;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 10*_stride_pdfs_tmp_3] = streamed_10;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 11*_stride_pdfs_tmp_3] = streamed_11;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 12*_stride_pdfs_tmp_3] = streamed_12;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 13*_stride_pdfs_tmp_3] = streamed_13;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 14*_stride_pdfs_tmp_3] = streamed_14;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 15*_stride_pdfs_tmp_3] = streamed_15;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 16*_stride_pdfs_tmp_3] = streamed_16;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 17*_stride_pdfs_tmp_3] = streamed_17;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 18*_stride_pdfs_tmp_3] = streamed_18;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 19*_stride_pdfs_tmp_3] = streamed_19;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 20*_stride_pdfs_tmp_3] = streamed_20;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 21*_stride_pdfs_tmp_3] = streamed_21;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 22*_stride_pdfs_tmp_3] = streamed_22;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 23*_stride_pdfs_tmp_3] = streamed_23;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 24*_stride_pdfs_tmp_3] = streamed_24;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 25*_stride_pdfs_tmp_3] = streamed_25;
+ _data_pdfs_tmp[_stride_pdfs_tmp_0*ctr_0 + _stride_pdfs_tmp_1*ctr_1 + _stride_pdfs_tmp_2*ctr_2 + 26*_stride_pdfs_tmp_3] = streamed_26;
}
}
}
@@ -700,104 +322,42 @@ static FUNC_PREFIX void d3q27srt_kernel_initialise(double * RESTRICT const _data
{
for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1)
{
- double * RESTRICT _data_density_20_30 = _data_density + _stride_density_2*ctr_2;
- double * RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2;
- double * RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3;
- double * RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3;
- double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
- double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_319 = _data_pdfs + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_320 = _data_pdfs + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_321 = _data_pdfs + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_322 = _data_pdfs + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1)
{
- double * RESTRICT _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30;
- double * RESTRICT _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30;
- double * RESTRICT _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31;
- double * RESTRICT _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32;
- double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
- double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
- double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
- double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
- double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
- double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
- double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
- double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
- double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
- double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
- double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
- double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
- double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
- double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
- double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
- double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
- double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
- double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
- double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
- double * RESTRICT _data_pdfs_20_319_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_319;
- double * RESTRICT _data_pdfs_20_320_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_320;
- double * RESTRICT _data_pdfs_20_321_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_321;
- double * RESTRICT _data_pdfs_20_322_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_322;
- double * RESTRICT _data_pdfs_20_323_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_323;
- double * RESTRICT _data_pdfs_20_324_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_324;
- double * RESTRICT _data_pdfs_20_325_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_325;
- double * RESTRICT _data_pdfs_20_326_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_326;
for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1)
{
- const double rho = _data_density_20_30_10[_stride_density_0*ctr_0];
+ const double rho = _data_density[_stride_density_0*ctr_0 + _stride_density_1*ctr_1 + _stride_density_2*ctr_2];
const double delta_rho = rho - 1.0;
- const double u_0 = _data_velocity_20_30_10[_stride_velocity_0*ctr_0];
- const double u_1 = _data_velocity_20_31_10[_stride_velocity_0*ctr_0];
- const double u_2 = _data_velocity_20_32_10[_stride_velocity_0*ctr_0];
- _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = delta_rho*0.29629629629629628 - 0.44444444444444442*(u_0*u_0) - 0.44444444444444442*(u_1*u_1) - 0.44444444444444442*(u_2*u_2);
- _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_1*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1);
- _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_1*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1);
- _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_0*-0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0);
- _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_0*0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0);
- _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_2*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2);
- _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = delta_rho*0.07407407407407407 + u_2*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2);
- _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*-0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
- _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
- _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*-0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
- _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
- _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
- _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
- _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
- _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
- _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
- _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
- _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
- _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
- _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
- _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
- _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
- _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
- _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
- _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
- _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
- _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+ const double u_0 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2];
+ const double u_1 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + _stride_velocity_3];
+ const double u_2 = _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3];
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2] = delta_rho*0.29629629629629628 - 0.44444444444444442*(u_0*u_0) - 0.44444444444444442*(u_1*u_1) - 0.44444444444444442*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3] = delta_rho*0.07407407407407407 + u_1*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_1*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_1*u_1);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_0*-0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_0*0.22222222222222221 - 0.1111111111111111*(u_1*u_1) - 0.1111111111111111*(u_2*u_2) + 0.22222222222222221*(u_0*u_0);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 5*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_2*0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3] = delta_rho*0.07407407407407407 + u_2*-0.22222222222222221 - 0.1111111111111111*(u_0*u_0) - 0.1111111111111111*(u_1*u_1) + 0.22222222222222221*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*-0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*0.055555555555555552 + u_1*0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_1*0.16666666666666666 + u_0*-0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_1*-0.16666666666666666 + u_0*0.055555555555555552 + u_1*-0.055555555555555552 - 0.027777777777777776*(u_2*u_2) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_1*u_1);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 11*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 12*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 13*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*-0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 14*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*0.055555555555555552 + u_2*0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 15*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_1*u_2*-0.16666666666666666 + u_1*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 16*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_1*u_2*0.16666666666666666 + u_1*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_0*u_0) + 0.055555555555555552*(u_1*u_1) + 0.055555555555555552*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 17*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_2*0.16666666666666666 + u_0*-0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 18*_stride_pdfs_3] = delta_rho*0.018518518518518517 + u_0*u_2*-0.16666666666666666 + u_0*0.055555555555555552 + u_2*-0.055555555555555552 - 0.027777777777777776*(u_1*u_1) + 0.055555555555555552*(u_0*u_0) + 0.055555555555555552*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 19*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 20*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 21*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 - _stride_pdfs_2 + 22*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*-0.013888888888888888 + u_2*0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 23*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 - _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 24*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*-0.041666666666666664 + u_1*0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 - _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 25*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*-0.041666666666666664 + u_0*u_2*-0.041666666666666664 + u_0*0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
+ _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 26*_stride_pdfs_3] = delta_rho*0.0046296296296296294 + u_0*u_1*0.041666666666666664 + u_0*u_2*0.041666666666666664 + u_0*-0.013888888888888888 + u_1*u_2*0.041666666666666664 + u_1*-0.013888888888888888 + u_2*-0.013888888888888888 + 0.013888888888888888*(u_0*u_0) + 0.013888888888888888*(u_1*u_1) + 0.013888888888888888*(u_2*u_2);
}
}
}
@@ -810,87 +370,25 @@ static FUNC_PREFIX void d3q27srt_kernel_getter(double * RESTRICT _data_density,
{
for (int64_t ctr_2 = 0; ctr_2 < _size_density_2; ctr_2 += 1)
{
- double * RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_319 = _data_pdfs + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_321 = _data_pdfs + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_323 = _data_pdfs + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_325 = _data_pdfs + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_320 = _data_pdfs + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_322 = _data_pdfs + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_324 = _data_pdfs + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_326 = _data_pdfs + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
- double * RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
- double * RESTRICT _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
- double * RESTRICT _data_density_20_30 = _data_density + _stride_density_2*ctr_2;
- double * RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2*ctr_2;
- double * RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2*ctr_2 + _stride_velocity_3;
- double * RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3;
for (int64_t ctr_1 = 0; ctr_1 < _size_density_1; ctr_1 += 1)
{
- double * RESTRICT _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
- double * RESTRICT _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
- double * RESTRICT _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
- double * RESTRICT _data_pdfs_20_319_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_319;
- double * RESTRICT _data_pdfs_20_321_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_321;
- double * RESTRICT _data_pdfs_20_323_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_323;
- double * RESTRICT _data_pdfs_20_325_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_325;
- double * RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
- double * RESTRICT _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
- double * RESTRICT _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
- double * RESTRICT _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
- double * RESTRICT _data_pdfs_20_320_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_320;
- double * RESTRICT _data_pdfs_20_322_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_322;
- double * RESTRICT _data_pdfs_20_324_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_324;
- double * RESTRICT _data_pdfs_20_326_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_326;
- double * RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
- double * RESTRICT _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
- double * RESTRICT _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
- double * RESTRICT _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
- double * RESTRICT _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
- double * RESTRICT _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
- double * RESTRICT _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
- double * RESTRICT _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
- double * RESTRICT _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
- double * RESTRICT _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
- double * RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
- double * RESTRICT _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
- double * RESTRICT _data_density_20_30_10 = _stride_density_1*ctr_1 + _data_density_20_30;
- double * RESTRICT _data_velocity_20_30_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_30;
- double * RESTRICT _data_velocity_20_31_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_31;
- double * RESTRICT _data_velocity_20_32_10 = _stride_velocity_1*ctr_1 + _data_velocity_20_32;
for (int64_t ctr_0 = 0; ctr_0 < _size_density_0; ctr_0 += 1)
{
- const double vel0Term = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
- const double momdensity_0 = vel0Term - 1.0*_data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
- const double vel1Term = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
- const double momdensity_1 = vel1Term - 1.0*_data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_321_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
- const double vel2Term = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_322_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
- const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
- const double momdensity_2 = vel2Term - 1.0*_data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_323_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_324_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_325_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_326_10[_stride_pdfs_0*ctr_0] - 1.0*_data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_319_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_320_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_321_10[_stride_pdfs_0*ctr_0];
+ const double vel0Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3];
+ const double momdensity_0 = vel0Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+ const double vel1Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_3];
+ const double momdensity_1 = vel1Term - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3];
+ const double vel2Term = _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 22*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3];
+ const double delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2];
+ const double momdensity_2 = vel2Term + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 19*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 20*_stride_pdfs_3] + _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 21*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 23*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 24*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 25*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 26*_stride_pdfs_3] - _data_pdfs[_stride_pdfs_0*ctr_0 + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3];
const double rho = delta_rho + 1.0;
const double u_0 = momdensity_0;
const double u_1 = momdensity_1;
const double u_2 = momdensity_2;
- _data_density_20_30_10[_stride_density_0*ctr_0] = rho;
- _data_velocity_20_30_10[_stride_velocity_0*ctr_0] = u_0;
- _data_velocity_20_31_10[_stride_velocity_0*ctr_0] = u_1;
- _data_velocity_20_32_10[_stride_velocity_0*ctr_0] = u_2;
+ _data_density[_stride_density_0*ctr_0 + _stride_density_1*ctr_1 + _stride_density_2*ctr_2] = rho;
+ _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2] = u_0;
+ _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + _stride_velocity_3] = u_1;
+ _data_velocity[_stride_velocity_0*ctr_0 + _stride_velocity_1*ctr_1 + _stride_velocity_2*ctr_2 + 2*_stride_velocity_3] = u_2;
}
}
}
diff --git a/src/lbm_generated/sweep_collection/D3Q27SRT.h b/src/lbm_generated/sweep_collection/D3Q27SRT.h
index eb45b716..72ba7d41 100644
--- a/src/lbm_generated/sweep_collection/D3Q27SRT.h
+++ b/src/lbm_generated/sweep_collection/D3Q27SRT.h
@@ -59,20 +59,20 @@ namespace lbm {
class D3Q27SRT
{
-public:
- enum Type { ALL = 0, INNER = 1, OUTER = 2 };
+ public:
+ enum Type { ALL = 0, INNER = 1, OUTER = 2 };
D3Q27SRT(const shared_ptr< StructuredBlockStorage > & blocks, BlockDataID pdfsID_, BlockDataID densityID_, BlockDataID velocityID_, double omega, const Cell & outerWidth=Cell(1, 1, 1))
- : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth)
+ : blocks_(blocks), pdfsID(pdfsID_), densityID(densityID_), velocityID(velocityID_), omega_(omega), outerWidth_(outerWidth)
{
+ validInnerOuterSplit_= true;
+
for (auto& iBlock : *blocks)
{
- if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 ||
- int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 ||
- int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
- WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock")
+ if (int_c(blocks->getNumberOfXCells(iBlock)) <= outerWidth_[0] * 2 || int_c(blocks->getNumberOfYCells(iBlock)) <= outerWidth_[1] * 2 || int_c(blocks->getNumberOfZCells(iBlock)) <= outerWidth_[2] * 2)
+ validInnerOuterSplit_ = false;
}
};
@@ -117,27 +117,33 @@ class D3Q27SRT
std::function streamCollide(Type type)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { streamCollideInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { streamCollideOuter(block); };
- default:
- return [this](IBlock* block) { streamCollide(block); };
+ case Type::INNER:
+ return [this](IBlock* block) { streamCollideInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { streamCollideOuter(block); };
+ default:
+ return [this](IBlock* block) { streamCollide(block); };
}
}
std::function streamCollide(Type type, const cell_idx_t ghost_layers)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { streamCollideInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { streamCollideOuter(block); };
- default:
- return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); };
+ case Type::INNER:
+ return [this](IBlock* block) { streamCollideInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { streamCollideOuter(block); };
+ default:
+ return [this, ghost_layers](IBlock* block) { streamCollide(block, ghost_layers); };
}
}
@@ -298,14 +304,14 @@ class D3Q27SRT
layers_.push_back(ci);
}
-
+
for( auto & ci: layers_ )
{
streamCollideCellInterval(pdfs, pdfs_tmp, omega, ci);
}
-
+
- pdfs->swapDataPointers(pdfs_tmp);
+ pdfs->swapDataPointers(pdfs_tmp);
}
@@ -317,27 +323,33 @@ class D3Q27SRT
std::function collide(Type type)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { collideInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { collideOuter(block); };
- default:
- return [this](IBlock* block) { collide(block); };
+ case Type::INNER:
+ return [this](IBlock* block) { collideInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { collideOuter(block); };
+ default:
+ return [this](IBlock* block) { collide(block); };
}
}
std::function collide(Type type, const cell_idx_t ghost_layers)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { collideInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { collideOuter(block); };
- default:
- return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); };
+ case Type::INNER:
+ return [this](IBlock* block) { collideInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { collideOuter(block); };
+ default:
+ return [this, ghost_layers](IBlock* block) { collide(block, ghost_layers); };
}
}
@@ -425,14 +437,14 @@ class D3Q27SRT
layers_.push_back(ci);
}
-
+
for( auto & ci: layers_ )
{
collideCellInterval(pdfs, omega, ci);
}
-
+
-
+
}
@@ -443,27 +455,33 @@ class D3Q27SRT
std::function stream(Type type)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { streamInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { streamOuter(block); };
- default:
- return [this](IBlock* block) { stream(block); };
+ case Type::INNER:
+ return [this](IBlock* block) { streamInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { streamOuter(block); };
+ default:
+ return [this](IBlock* block) { stream(block); };
}
}
std::function stream(Type type, const cell_idx_t ghost_layers)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { streamInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { streamOuter(block); };
- default:
- return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); };
+ case Type::INNER:
+ return [this](IBlock* block) { streamInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { streamOuter(block); };
+ default:
+ return [this, ghost_layers](IBlock* block) { stream(block, ghost_layers); };
}
}
@@ -624,14 +642,14 @@ class D3Q27SRT
layers_.push_back(ci);
}
-
+
for( auto & ci: layers_ )
{
streamCellInterval(pdfs, pdfs_tmp, ci);
}
-
+
- pdfs->swapDataPointers(pdfs_tmp);
+ pdfs->swapDataPointers(pdfs_tmp);
}
@@ -643,27 +661,33 @@ class D3Q27SRT
std::function streamOnlyNoAdvancement(Type type)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
- default:
- return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
+ case Type::INNER:
+ return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+ default:
+ return [this](IBlock* block) { streamOnlyNoAdvancement(block); };
}
}
std::function streamOnlyNoAdvancement(Type type, const cell_idx_t ghost_layers)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
- default:
- return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); };
+ case Type::INNER:
+ return [this](IBlock* block) { streamOnlyNoAdvancementInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { streamOnlyNoAdvancementOuter(block); };
+ default:
+ return [this, ghost_layers](IBlock* block) { streamOnlyNoAdvancement(block, ghost_layers); };
}
}
@@ -821,14 +845,14 @@ class D3Q27SRT
layers_.push_back(ci);
}
-
+
for( auto & ci: layers_ )
{
streamOnlyNoAdvancementCellInterval(pdfs, pdfs_tmp, ci);
}
-
+
-
+
}
@@ -839,27 +863,33 @@ class D3Q27SRT
std::function initialise(Type type)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { initialiseInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { initialiseOuter(block); };
- default:
- return [this](IBlock* block) { initialise(block); };
+ case Type::INNER:
+ return [this](IBlock* block) { initialiseInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { initialiseOuter(block); };
+ default:
+ return [this](IBlock* block) { initialise(block); };
}
}
std::function initialise(Type type, const cell_idx_t ghost_layers)
{
+ if (!validInnerOuterSplit_ && type != Type::ALL)
+ WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller, increase cellsPerBlock or avoid communication hiding")
+
switch (type)
{
- case Type::INNER:
- return [this](IBlock* block) { initialiseInner(block); };
- case Type::OUTER:
- return [this](IBlock* block) { initialiseOuter(block); };
- default:
- return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); };
+ case Type::INNER:
+ return [this](IBlock* block) { initialiseInner(block); };
+ case Type::OUTER:
+ return [this](IBlock* block) { initialiseOuter(block); };
+ default:
+ return [this, ghost_layers](IBlock* block) { initialise(block, ghost_layers); };
}
}
@@ -870,9 +900,9 @@ class D3Q27SRT
const cell_idx_t ghost_layers = 0;
+ auto density = block->getData< field::GhostLayerField