* Add more GPU architectures support * Update layout.py * Optimize performance, Add SM90 support, Add 1D2D SM100 support * Add fmtlib submodule at commit 553ec11 --------- Co-authored-by: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com>
32 lines
830 B
C++
32 lines
830 B
C++
#pragma once
|
|
|
|
#include <filesystem>
|
|
#include <memory>
|
|
#include <unordered_map>
|
|
|
|
#include "kernel_runtime.hpp"
|
|
|
|
namespace deep_gemm {
|
|
|
|
class KernelRuntimeCache {
|
|
std::unordered_map<std::filesystem::path, std::shared_ptr<KernelRuntime>> cache;
|
|
|
|
public:
|
|
// TODO: consider cache capacity
|
|
KernelRuntimeCache() = default;
|
|
|
|
std::shared_ptr<KernelRuntime> get(const std::filesystem::path& dir_path) {
|
|
// Hit the runtime cache
|
|
if (const auto& iterator = cache.find(dir_path); iterator != cache.end())
|
|
return iterator->second;
|
|
|
|
if (KernelRuntime::check_validity(dir_path))
|
|
return cache[dir_path] = std::make_shared<KernelRuntime>(dir_path);
|
|
return nullptr;
|
|
}
|
|
};
|
|
|
|
static auto kernel_runtime_cache = std::make_shared<KernelRuntimeCache>();
|
|
|
|
} // namespace deep_gemm
|