blob: 60748cc7d8adc5252320f38803530df245c0cd1c [file] [log] [blame]
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
// Copyright (C) 2014 Benoit Steiner <>
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at
#if defined(EIGEN_USE_LIBXSMM)
#include "third_party/libxsmm/include/libxsmm.h"
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
enum {
ShardByRow = 0,
ShardByCol = 1
// Default Blocking Strategy
template<typename ResScalar, typename LhsScalar, typename RhsScalar, typename StorageIndex, int ShardingType = ShardByCol>
class TensorContractionBlocking {
adding EIGEN_DEVICE_FUNC unconditionally to 'TensorContractionBlocking' constructor in `TensorContractionBlocking.h`
requires adding EIGEN_DEVICE_FUNC to `computeProductBlockingSizes` in `GeneralBlockPanelKernel.h`
which in turn, requires adding EIGEN_DEVICE_FUNC to `evaluateProductBlockingSizesHeuristic` in `GeneralBlockPanelKernel.h`
which in turn, requires adding EIGEN_DEVICE_FUNC to `manage_caching_sizes` in `GeneralBlockPanelKernel.h`
(else HIPCC will error out)
However adding EIGEN_DEVICE_FUNC to `manage_caching_sizes` in `GeneralBlockPanelKernel.h`
results in NVCC erroring out with the following error
../Eigen/src/Core/products/GeneralBlockPanelKernel.h(57): error #2901:
dynamic initialization is not supported for function-scope static variables within a __device__/__global__ function
#if !defined(EIGEN_HIPCC)
TensorContractionBlocking(StorageIndex k, StorageIndex m, StorageIndex n, StorageIndex num_threads = 1) :
kc_(k), mc_(m), nc_(n)
if (ShardingType == ShardByCol) {
computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, mc_, nc_, num_threads);
else {
computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, nc_, mc_, num_threads);
const int rhs_packet_size = internal::packet_traits<RhsScalar>::size;
kc_ = (rhs_packet_size <= 8 || kc_ <= rhs_packet_size) ?
kc_ : (kc_ / rhs_packet_size) * rhs_packet_size;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex kc() const { return kc_; }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex mc() const { return mc_; }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex nc() const { return nc_; }
StorageIndex kc_;
StorageIndex mc_;
StorageIndex nc_;
} // end namespace internal
} // end namespace Eigen