Add VictoriaMetrics for historical metrics (Mar 13+)

- Single-node VM deployment with 200Gi NVMe, 2y retention
- Traefik IngressRoute at vm.vultrlabs.dev (TLS + basic auth)
- Backfill script: pulls vLLM/DCGM metrics from Mimir, writes to VM
- Retain StorageClass so historical data survives PVC deletion
- README with deployment + Grafana mixed-datasource instructions
This commit is contained in:
2026-04-09 19:29:18 +00:00
parent 7ade5ecac8
commit bf6d62b9a8
10 changed files with 690 additions and 0 deletions

View File

@@ -0,0 +1,105 @@
##############################################################################
# VictoriaMetrics Single-Node Deployment
# Stores historical metrics from Mimir (Mar 13present) for Grafana queries
##############################################################################
apiVersion: apps/v1
kind: Deployment
metadata:
name: victoriametrics
namespace: victoriametrics
labels:
app.kubernetes.io/name: victoriametrics
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: victoriametrics
template:
metadata:
labels:
app.kubernetes.io/name: victoriametrics
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8428"
spec:
securityContext:
fsGroup: 65534
containers:
- name: victoriametrics
image: victoriametrics/victoria-metrics:v1.115.0
args:
- "-storageDataPath=/data"
- "-retentionPeriod=2y" # Keep historical data for 2 years
- "-httpListenAddr=:8428"
- "-search.maxQueryDuration=120s" # Long-running queries OK for historical
- "-search.maxSamplesPerQuery=100000000" # High limit for wide historical queries
- "-memory.allowedBytes=4GB" # Memory budget
- "-search.maxUniqueTimeseries=5000000" # Allow high cardinality
ports:
- name: http
containerPort: 8428
volumeMounts:
- name: data
mountPath: /data
resources:
requests:
cpu: "2"
memory: 4Gi
limits:
cpu: "4"
memory: 8Gi
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 30
periodSeconds: 15
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 5
volumes:
- name: data
persistentVolumeClaim:
claimName: victoriametrics-data
---
##############################################################################
# PVC — Vultr Block Storage for VictoriaMetrics data
##############################################################################
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: victoriametrics-data
namespace: victoriametrics
spec:
storageClassName: vultr-block-storage-vm
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 200Gi
---
##############################################################################
# Service — ClusterIP (Traefik handles external access)
##############################################################################
apiVersion: v1
kind: Service
metadata:
name: victoriametrics
namespace: victoriametrics
labels:
app.kubernetes.io/name: victoriametrics
spec:
selector:
app.kubernetes.io/name: victoriametrics
ports:
- name: http
port: 8428
targetPort: http