#!/usr/bin/env python3 """ Backfill historical metrics from Mimir to VictoriaMetrics. Uses VictoriaMetrics /api/v1/import endpoint which happily accepts data with any timestamp — no bufferPast gates, no block size hacks. Usage: # Run in-cluster (as a pod, see backfill-pod.yaml) python3 backfill.py # Or locally with port-forward kubectl port-forward -n victoriametrics svc/victoriametrics 8428:8428 VM_URL=http://localhost:8428 python3 backfill.py """ import urllib.request import urllib.error import urllib.parse import json import ssl import os import time import base64 import sys # ── Configuration ────────────────────────────────────────────────── MIMIR_URL = os.environ.get("MIMIR_URL", "https://metrics.vultrlabs.com/prometheus") MIMIR_USER = os.environ.get("MIMIR_USERNAME", "REPLACE_WITH_MIMIR_USERNAME") MIMIR_PASS = os.environ.get("MIMIR_PASSWORD", "REPLACE_WITH_MIMIR_PASSWORD") VM_URL = os.environ.get("VM_URL", "http://victoriametrics.victoriametrics.svc.cluster.local:8428") # Time range: March 13, 2026 00:00:00 UTC → now START_TS = int(os.environ.get("START_TS", "1773360000")) # 2026-03-13T00:00:00Z END_TS = int(os.environ.get("END_TS", str(int(time.time())))) STEP = os.environ.get("STEP", "10s") CHUNK_HOURS = int(os.environ.get("CHUNK_HOURS", "6")) # Metrics to backfill METRICS = [ "vllm:prompt_tokens_total", "vllm:generation_tokens_total", "DCGM_FI_DEV_GPU_UTIL", ] # Extra labels to add to all imported data (e.g. tenant/cluster context) EXTRA_LABELS = { "tenant": "serverless-inference-cluster", "cluster": "serverless-inference-cluster", } # ── Helpers ──────────────────────────────────────────────────────── def ssl_ctx(): ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE return ctx def mimir_query(path): """Query Mimir API with basic auth.""" auth = base64.b64encode(f"{MIMIR_USER}:{MIMIR_PASS}".encode()).decode() req = urllib.request.Request(f"{MIMIR_URL}{path}") req.add_header("Authorization", f"Basic {auth}") resp = urllib.request.urlopen(req, context=ssl_ctx(), timeout=300) return json.loads(resp.read().decode()) def vm_import(lines): """Push data to VictoriaMetrics /api/v1/import.""" data = "\n".join(lines).encode("utf-8") req = urllib.request.Request( f"{VM_URL}/api/v1/import", data=data, method="POST", ) req.add_header("Content-Type", "application/octet-stream") try: resp = urllib.request.urlopen(req, timeout=300) return True except urllib.error.HTTPError as e: body = e.read().decode()[:200] print(f" VM import ERROR {e.code}: {body}", flush=True) return False def format_prom_metric_name(raw_name): """Convert Mimir metric name to valid Prometheus metric name for VM. VictoriaMetrics import format uses: metric_name{label1="val1",...} timestamp value Colons in metric names are valid in Prometheus but we keep them as-is since VM handles them fine. """ return raw_name # ── Main ─────────────────────────────────────────────────────────── print(f"VictoriaMetrics Backfill", flush=True) print(f"========================", flush=True) print(f"Source: {MIMIR_URL}", flush=True) print(f"Target: {VM_URL}", flush=True) print(f"Range: {START_TS} → {END_TS} ({CHUNK_HOURS}h chunks)", flush=True) print(f"Metrics: {', '.join(METRICS)}", flush=True) print(f"Extra labels: {EXTRA_LABELS}", flush=True) print(flush=True) total_samples = 0 total_errors = 0 for metric in METRICS: print(f"\n{'='*60}", flush=True) print(f"Metric: {metric}", flush=True) print(f"{'='*60}", flush=True) metric_samples = 0 chunk_start = START_TS while chunk_start < END_TS: chunk_end = min(chunk_start + CHUNK_HOURS * 3600, END_TS) chunk_label = f"[{time.strftime('%Y-%m-%d %H:%M', time.gmtime(chunk_start))} → {time.strftime('%Y-%m-%d %H:%M', time.gmtime(chunk_end))}]" print(f" {chunk_label} ...", end="", flush=True) try: path = ( f"/api/v1/query_range?" f"query={urllib.parse.quote(metric)}" f"&start={chunk_start}&end={chunk_end}&step={STEP}" ) data = mimir_query(path) if data.get("status") != "success": print(f" Mimir returned status={data.get('status')}", flush=True) chunk_start = chunk_end continue series_list = data["data"]["result"] if not series_list: print(f" no data", flush=True) chunk_start = chunk_end continue # Build import lines in VictoriaMetrics native format # Format: metric_name{label1="val1",label2="val2"} timestamp value import_lines = [] chunk_count = 0 for series in series_list: labels = dict(series["metric"]) # Remove __name__ from labels (it's the metric name) metric_name = labels.pop("__name__", metric) # Add extra labels labels.update(EXTRA_LABELS) # Build label string label_parts = [f'{k}="{v}"' for k, v in sorted(labels.items())] label_str = ",".join(label_parts) # Build import lines: one per sample for ts_str, val_str in series["values"]: # Convert timestamp (seconds) to ms for VM ts_ms = int(float(ts_str) * 1000) try: val = float(val_str) except (ValueError, TypeError): # Handle +Inf, -Inf, NaN if val_str == "+Inf": val = float("inf") elif val_str == "-Inf": val = float("-inf") else: continue import_lines.append(f'{metric_name}{{{label_str}}} {ts_ms} {val_str}') chunk_count += 1 if import_lines: ok = vm_import(import_lines) if ok: print(f" {chunk_count} samples imported", flush=True) metric_samples += chunk_count else: print(f" IMPORT FAILED ({chunk_count} samples lost)", flush=True) total_errors += chunk_count else: print(f" 0 samples", flush=True) except Exception as e: print(f" ERROR: {e}", flush=True) total_errors += 1 chunk_start = chunk_end print(f" Total for {metric}: {metric_samples} samples", flush=True) total_samples += metric_samples print(f"\n{'='*60}", flush=True) print(f"BACKFILL COMPLETE", flush=True) print(f"Total samples imported: {total_samples}", flush=True) print(f"Total errors: {total_errors}", flush=True) print(f"{'='*60}", flush=True) # Verify by querying VM print(f"\nVerifying import...", flush=True) try: verify_path = f"/api/v1/query?query={urllib.parse.quote('count(up)')}" req = urllib.request.Request(f"{VM_URL}{verify_path}") resp = urllib.request.urlopen(req, timeout=30) print(f"VM is responding to queries ✓", flush=True) except Exception as e: print(f"VM query check failed: {e}", flush=True)