Skip to content

Commit

Permalink
refine code
Browse files Browse the repository at this point in the history
  • Loading branch information
taozha2 committed Jan 10, 2025
1 parent b8586e6 commit 72e8400
Show file tree
Hide file tree
Showing 12 changed files with 249 additions and 176 deletions.
6 changes: 5 additions & 1 deletion include/cute/arch/xe_copy_2B.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,8 @@ struct XE_2D_U16x16x8_LD_T {
using BlockShape = Shape<_8, _16>;
using inst_dtype = uint32_t;

static constexpr bool is_transpose = true;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand All @@ -750,8 +752,10 @@ struct XE_2D_U16x16x8_LD_T {

struct XE_2D_U16x16x16_LD_T {
using BlockShape = Shape<_16, _16>;

using inst_dtype = uint32_t;

static constexpr bool is_transpose = true;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand Down
8 changes: 8 additions & 0 deletions include/cute/arch/xe_copy_4B.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,8 @@ struct XE_2D_TF32x32x16_LD_N {


struct XE_2D_U32x16x1_LD_T {
static constexpr bool is_transpose = true;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand All @@ -617,6 +619,8 @@ struct XE_2D_U32x16x1_LD_T {
struct XE_2D_U32x16x2_LD_T {
using BlockShape = Shape<_2, _16>;

static constexpr bool is_transpose = true;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand All @@ -635,6 +639,8 @@ struct XE_2D_U32x16x2_LD_T {
struct XE_2D_U32x16x4_LD_T {
using BlockShape = Shape<_4, _16>;

static constexpr bool is_transpose = true;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand All @@ -653,6 +659,8 @@ struct XE_2D_U32x16x4_LD_T {
struct XE_2D_U32x16x8_LD_T {
using BlockShape = Shape<_8, _16>;

static constexpr bool is_transpose = true;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand Down
6 changes: 6 additions & 0 deletions include/cute/arch/xe_copy_8B.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ SYCL_DEVICE_OCL(intel::ulong4 intel_sub_group_block_read_transpose_64b_8r4c(
namespace cute
{
struct XE_2D_U64x8x1_LD_T {
static constexpr bool is_transpose = true;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand All @@ -97,6 +99,8 @@ struct XE_2D_U64x8x1_LD_T {
};

struct XE_2D_U64x8x2_LD_T {
static constexpr bool is_transpose = true;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand All @@ -113,6 +117,8 @@ struct XE_2D_U64x8x2_LD_T {
};

struct XE_2D_U64x8x4_LD_T {
static constexpr bool is_transpose = true;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand Down
Loading

0 comments on commit 72e8400

Please sign in to comment.