- Add support for legacy CUDA versions; now compatible with CUDA 12.3 and newer - Add support for NVRTC compilation - Other fixes and code refactoring
32 lines
820 B
C++
32 lines
820 B
C++
#pragma once
|
|
|
|
#include <filesystem>
|
|
#include <memory>
|
|
#include <unordered_map>
|
|
|
|
#include "kernel_runtime.hpp"
|
|
|
|
namespace deep_gemm {
|
|
|
|
class KernelRuntimeCache {
|
|
std::unordered_map<std::string, std::shared_ptr<KernelRuntime>> cache;
|
|
|
|
public:
|
|
// TODO: consider cache capacity
|
|
KernelRuntimeCache() = default;
|
|
|
|
std::shared_ptr<KernelRuntime> get(const std::filesystem::path& dir_path) {
|
|
// Hit the runtime cache
|
|
if (const auto& iterator = cache.find(dir_path); iterator != cache.end())
|
|
return iterator->second;
|
|
|
|
if (KernelRuntime::check_validity(dir_path))
|
|
return cache[dir_path] = std::make_shared<KernelRuntime>(dir_path);
|
|
return nullptr;
|
|
}
|
|
};
|
|
|
|
static auto kernel_runtime_cache = std::make_shared<KernelRuntimeCache>();
|
|
|
|
} // namespace deep_gemm
|