init commit

This commit is contained in:
2026-03-31 08:28:16 -04:00
commit ac13c30905
9 changed files with 1028 additions and 0 deletions

13
00-namespace.yaml Normal file
View File

@@ -0,0 +1,13 @@
##############################################################################
# M3DB on Vultr Kubernetes Engine
# Replaces Mimir for long-term metrics storage
# Uses Vultr Block Storage CSI for persistent volumes
##############################################################################
apiVersion: v1
kind: Namespace
metadata:
name: m3db
labels:
app.kubernetes.io/name: m3db
app.kubernetes.io/part-of: metrics-platform

15
01-storageclass.yaml Normal file
View File

@@ -0,0 +1,15 @@
##############################################################################
# StorageClass — Vultr Block Storage CSI
# Uses Vultr's CSI driver (csi.vultr.com) for dynamic provisioning
##############################################################################
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: vultr-block-storage-m3db
provisioner: block.csi.vultr.com
parameters:
block_type: "high_perf" # high_perf for SSD-backed NVMe storage
reclaimPolicy: Retain # Retain data on PVC deletion (safety)
allowVolumeExpansion: true # Allow online volume resizing
volumeBindingMode: WaitForFirstConsumer

122
02-etcd.yaml Normal file
View File

@@ -0,0 +1,122 @@
##############################################################################
# etcd cluster for M3DB placement & topology
# M3DB requires etcd for cluster coordination
##############################################################################
apiVersion: v1
kind: Service
metadata:
name: etcd
namespace: m3db
labels:
app.kubernetes.io/name: etcd
app.kubernetes.io/part-of: m3db
spec:
clusterIP: None
ports:
- name: client
port: 2379
targetPort: 2379
- name: peer
port: 2380
targetPort: 2380
selector:
app.kubernetes.io/name: etcd
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: etcd
namespace: m3db
labels:
app.kubernetes.io/name: etcd
app.kubernetes.io/part-of: m3db
spec:
serviceName: etcd
replicas: 3
selector:
matchLabels:
app.kubernetes.io/name: etcd
template:
metadata:
labels:
app.kubernetes.io/name: etcd
app.kubernetes.io/part-of: m3db
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- etcd
topologyKey: kubernetes.io/hostname
containers:
- name: etcd
image: quay.io/coreos/etcd:v3.5.15
ports:
- containerPort: 2379
name: client
- containerPort: 2380
name: peer
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: CLUSTER_SIZE
value: "3"
command:
- /bin/sh
- -ec
- |
PEERS=""
for i in $(seq 0 $((${CLUSTER_SIZE} - 1))); do
PEERS="${PEERS}${PEERS:+,}etcd-${i}=http://etcd-${i}.etcd.m3db.svc.cluster.local:2380"
done
exec etcd \
--name=${POD_NAME} \
--listen-peer-urls=http://0.0.0.0:2380 \
--listen-client-urls=http://0.0.0.0:2379 \
--advertise-client-urls=http://${POD_NAME}.etcd.m3db.svc.cluster.local:2379 \
--initial-advertise-peer-urls=http://${POD_NAME}.etcd.m3db.svc.cluster.local:2380 \
--initial-cluster=${PEERS} \
--initial-cluster-state=new \
--data-dir=/var/lib/etcd/data \
--auto-compaction-retention=1
volumeMounts:
- name: etcd-data
mountPath: /var/lib/etcd
resources:
requests:
cpu: 200m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
livenessProbe:
httpGet:
path: /health
port: 2379
initialDelaySeconds: 15
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 2379
initialDelaySeconds: 5
periodSeconds: 5
volumeClaimTemplates:
- metadata:
name: etcd-data
spec:
storageClassName: vultr-block-storage-m3db
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 10Gi

267
03-configmaps.yaml Normal file
View File

@@ -0,0 +1,267 @@
##############################################################################
# M3DB Configuration
# Tuned for replacing Mimir — supports Prometheus remote write/read
##############################################################################
apiVersion: v1
kind: ConfigMap
metadata:
name: m3db-config
namespace: m3db
labels:
app.kubernetes.io/name: m3db
data:
m3dbnode.yml: |
coordinator:
listenAddress: 0.0.0.0:7201
metrics:
scope:
prefix: coordinator
prometheus:
handlerPath: /metrics
sanitization: prometheus
samplingRate: 1.0
extended: none
# Prometheus remote write/read endpoints (Mimir replacement)
tagOptions:
idScheme: quoted
db:
logging:
level: info
metrics:
prometheus:
handlerPath: /metrics
sanitization: prometheus
samplingRate: 1.0
extended: detailed
listenAddress: 0.0.0.0:9000
clusterListenAddress: 0.0.0.0:9001
httpNodeListenAddress: 0.0.0.0:9002
httpClusterListenAddress: 0.0.0.0:9003
debugListenAddress: 0.0.0.0:9004
hostID:
resolver: hostname
client:
writeConsistencyLevel: majority
readConsistencyLevel: unstrict_majority
writeTimeout: 10s
fetchTimeout: 15s
connectTimeout: 20s
writeRetry:
initialBackoff: 500ms
backoffFactor: 3
maxRetries: 2
jitter: true
fetchRetry:
initialBackoff: 500ms
backoffFactor: 2
maxRetries: 3
jitter: true
# Cluster discovery via etcd
discovery:
config:
service:
env: default_env
zone: embedded
service: m3db
cacheDir: /var/lib/m3kv
etcdClusters:
- zone: embedded
endpoints:
- http://etcd-0.etcd.m3db.svc.cluster.local:2379
- http://etcd-1.etcd.m3db.svc.cluster.local:2379
- http://etcd-2.etcd.m3db.svc.cluster.local:2379
# Cache configuration
cache:
series:
policy: lru
postingsList:
size: 262144
# Commit log
commitlog:
flushMaxBytes: 524288
flushEvery: 1s
queue:
calculationType: fixed
size: 2097152
# Filesystem (data persistence)
filesystem:
filePathPrefix: /var/lib/m3db
writeBufferSize: 65536
dataReadBufferSize: 65536
infoReadBufferSize: 128
seekReadBufferSize: 4096
throughputLimitMbps: 1000.0
throughputCheckEvery: 128
# Repair disabled by default — enable once cluster is stable
repair:
enabled: false
# Pooling for performance
pooling:
blockAllocSize: 16
type: simple
seriesPool:
size: 262144
lowWatermark: 0.7
highWatermark: 1.0
blockPool:
size: 262144
lowWatermark: 0.7
highWatermark: 1.0
encoderPool:
size: 262144
lowWatermark: 0.7
highWatermark: 1.0
segmentReaderPool:
size: 16384
lowWatermark: 0.2
highWatermark: 1.0
iteratorPool:
size: 2048
lowWatermark: 0.2
highWatermark: 1.0
fetchBlockMetadataResultsPool:
size: 65536
capacity: 32
lowWatermark: 0.01
highWatermark: 1.0
fetchBlocksMetadataResultsPool:
size: 32
capacity: 4096
lowWatermark: 0.01
highWatermark: 1.0
bytesPool:
buckets:
- capacity: 16
size: 524288
lowWatermark: 0.01
highWatermark: 1.0
- capacity: 32
size: 262144
lowWatermark: 0.01
highWatermark: 1.0
- capacity: 64
size: 131072
lowWatermark: 0.01
highWatermark: 1.0
- capacity: 128
size: 65536
lowWatermark: 0.01
highWatermark: 1.0
- capacity: 256
size: 65536
lowWatermark: 0.01
highWatermark: 1.0
- capacity: 1440
size: 16384
lowWatermark: 0.01
highWatermark: 1.0
- capacity: 4096
size: 8192
lowWatermark: 0.01
highWatermark: 1.0
---
##############################################################################
# M3 Coordinator standalone config
# Handles Prometheus remote read/write + Grafana queries
##############################################################################
apiVersion: v1
kind: ConfigMap
metadata:
name: m3coordinator-config
namespace: m3db
labels:
app.kubernetes.io/name: m3coordinator
data:
m3coordinator.yml: |
listenAddress: 0.0.0.0:7201
logging:
level: info
metrics:
scope:
prefix: coordinator
prometheus:
handlerPath: /metrics
sanitization: prometheus
samplingRate: 1.0
tagOptions:
idScheme: quoted
clusters:
- namespaces:
- namespace: default
type: unaggregated
retention: 48h
- namespace: agg_10s_30d
type: aggregated
retention: 720h
resolution: 10s
- namespace: agg_1m_1y
type: aggregated
retention: 8760h
resolution: 1m
client:
config:
service:
env: default_env
zone: embedded
service: m3db
cacheDir: /var/lib/m3kv
etcdClusters:
- zone: embedded
endpoints:
- http://etcd-0.etcd.m3db.svc.cluster.local:2379
- http://etcd-1.etcd.m3db.svc.cluster.local:2379
- http://etcd-2.etcd.m3db.svc.cluster.local:2379
writeConsistencyLevel: majority
readConsistencyLevel: unstrict_majority
# Downsample configuration
downsample:
rules:
mappingRules:
- name: "10s for 30 days"
filter: "__name__:*"
aggregations: ["Last"]
storagePolicies:
- resolution: 10s
retention: 720h
- name: "1m for 1 year"
filter: "__name__:*"
aggregations: ["Last"]
storagePolicies:
- resolution: 1m
retention: 8760h
# Ingest — Prometheus remote write
ingest:
ingester:
workerPoolSize: 10000
opPool:
size: 10000
m3msg:
server:
listenAddress: 0.0.0.0:7507
# Carbon ingestion disabled (uncomment if needed)
# carbon:
# ingester:
# listenAddress: "0.0.0.0:7204"

156
04-m3dbnode.yaml Normal file
View File

@@ -0,0 +1,156 @@
##############################################################################
# M3DB Node — Headless Service (for StatefulSet DNS)
##############################################################################
apiVersion: v1
kind: Service
metadata:
name: m3dbnode
namespace: m3db
labels:
app.kubernetes.io/name: m3dbnode
app.kubernetes.io/part-of: m3db
spec:
clusterIP: None
ports:
- name: client
port: 9000
targetPort: 9000
- name: cluster
port: 9001
targetPort: 9001
- name: http-node
port: 9002
targetPort: 9002
- name: http-cluster
port: 9003
targetPort: 9003
- name: debug
port: 9004
targetPort: 9004
- name: coordinator
port: 7201
targetPort: 7201
selector:
app.kubernetes.io/name: m3dbnode
---
##############################################################################
# M3DB Node StatefulSet
# 3 replicas — one per availability zone / node for HA
##############################################################################
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: m3dbnode
namespace: m3db
labels:
app.kubernetes.io/name: m3dbnode
app.kubernetes.io/part-of: m3db
spec:
serviceName: m3dbnode
replicas: 3
podManagementPolicy: Parallel
selector:
matchLabels:
app.kubernetes.io/name: m3dbnode
template:
metadata:
labels:
app.kubernetes.io/name: m3dbnode
app.kubernetes.io/part-of: m3db
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "7203"
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- m3dbnode
topologyKey: kubernetes.io/hostname
securityContext:
fsGroup: 65534
terminationGracePeriodSeconds: 120
containers:
- name: m3dbnode
image: quay.io/m3db/m3dbnode:v1.5.0
imagePullPolicy: IfNotPresent
args:
- "-f"
- "/etc/m3db/m3dbnode.yml"
ports:
- containerPort: 9000
name: client
- containerPort: 9001
name: cluster
- containerPort: 9002
name: http-node
- containerPort: 9003
name: http-cluster
- containerPort: 9004
name: debug
- containerPort: 7201
name: coordinator
- containerPort: 7203
name: metrics
volumeMounts:
- name: m3db-data
mountPath: /var/lib/m3db
- name: m3db-config
mountPath: /etc/m3db
- name: cache-dir
mountPath: /var/lib/m3kv
resources:
requests:
cpu: "1"
memory: 4Gi
limits:
cpu: "2"
memory: 8Gi
livenessProbe:
httpGet:
path: /health
port: 9002
initialDelaySeconds: 60
periodSeconds: 15
timeoutSeconds: 5
failureThreshold: 5
readinessProbe:
httpGet:
path: /health
port: 9002
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- "sleep 30" # allow in-flight writes to drain
volumes:
- name: m3db-config
configMap:
name: m3db-config
- name: cache-dir
emptyDir: {}
volumeClaimTemplates:
- metadata:
name: m3db-data
spec:
storageClassName: vultr-block-storage-m3db
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 100Gi # Adjust based on retention & cardinality

117
05-m3coordinator.yaml Normal file
View File

@@ -0,0 +1,117 @@
##############################################################################
# M3 Coordinator — Deployment
# Stateless query/write layer — Prometheus remote_write & remote_read target
# This is what Grafana and Prometheus talk to (replaces Mimir endpoints)
##############################################################################
apiVersion: apps/v1
kind: Deployment
metadata:
name: m3coordinator
namespace: m3db
labels:
app.kubernetes.io/name: m3coordinator
app.kubernetes.io/part-of: m3db
spec:
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: m3coordinator
template:
metadata:
labels:
app.kubernetes.io/name: m3coordinator
app.kubernetes.io/part-of: m3db
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "7203"
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- m3coordinator
topologyKey: kubernetes.io/hostname
containers:
- name: m3coordinator
image: quay.io/m3db/m3coordinator:v1.5.0
imagePullPolicy: IfNotPresent
args:
- "-f"
- "/etc/m3coordinator/m3coordinator.yml"
ports:
- containerPort: 7201
name: api
protocol: TCP
- containerPort: 7203
name: metrics
protocol: TCP
volumeMounts:
- name: config
mountPath: /etc/m3coordinator
- name: cache-dir
mountPath: /var/lib/m3kv
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: "1"
memory: 2Gi
livenessProbe:
httpGet:
path: /api/v1/services/m3db/health
port: 7201
initialDelaySeconds: 15
periodSeconds: 10
readinessProbe:
httpGet:
path: /api/v1/services/m3db/health
port: 7201
initialDelaySeconds: 10
periodSeconds: 5
volumes:
- name: config
configMap:
name: m3coordinator-config
- name: cache-dir
emptyDir: {}
---
##############################################################################
# M3 Coordinator Service
# Endpoints for Prometheus remote_write / remote_read / Grafana
#
# remote_write → http://m3coordinator.m3db.svc.cluster.local:7201/api/v1/prom/remote/write
# remote_read → http://m3coordinator.m3db.svc.cluster.local:7201/api/v1/prom/remote/read
# query (Grafana Prometheus datasource) → http://m3coordinator.m3db.svc.cluster.local:7201
##############################################################################
apiVersion: v1
kind: Service
metadata:
name: m3coordinator
namespace: m3db
labels:
app.kubernetes.io/name: m3coordinator
app.kubernetes.io/part-of: m3db
spec:
type: ClusterIP
ports:
- name: api
port: 7201
targetPort: 7201
protocol: TCP
- name: metrics
port: 7203
targetPort: 7203
protocol: TCP
selector:
app.kubernetes.io/name: m3coordinator

216
06-init-and-pdb.yaml Normal file
View File

@@ -0,0 +1,216 @@
##############################################################################
# PodDisruptionBudgets — keep quorum during rolling updates
##############################################################################
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: m3dbnode-pdb
namespace: m3db
spec:
minAvailable: 2
selector:
matchLabels:
app.kubernetes.io/name: m3dbnode
---
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: etcd-pdb
namespace: m3db
spec:
minAvailable: 2
selector:
matchLabels:
app.kubernetes.io/name: etcd
---
##############################################################################
# Cluster Init Job
# Run ONCE after all m3dbnode pods are Ready to:
# 1. Create the placement (topology)
# 2. Create the namespaces (retention policies)
# 3. Wait for the cluster to initialize
#
# kubectl apply -f 06-init-and-pdb.yaml
# (then monitor with: kubectl logs -n m3db job/m3db-cluster-init)
##############################################################################
apiVersion: batch/v1
kind: Job
metadata:
name: m3db-cluster-init
namespace: m3db
labels:
app.kubernetes.io/name: m3db-init
app.kubernetes.io/part-of: m3db
spec:
backoffLimit: 5
ttlSecondsAfterFinished: 3600
template:
spec:
restartPolicy: OnFailure
containers:
- name: init
image: curlimages/curl:8.7.1
command:
- /bin/sh
- -exc
- |
COORD="http://m3coordinator.m3db.svc.cluster.local:7201"
echo "=== Waiting for coordinator to be healthy ==="
until curl -sf "${COORD}/api/v1/services/m3db/health"; do
echo "Coordinator not ready yet, retrying in 5s..."
sleep 5
done
echo ""
echo "=== Creating M3DB placement ==="
curl -sSf -X POST "${COORD}/api/v1/services/m3db/placement/init" \
-H "Content-Type: application/json" \
-d '{
"num_shards": 64,
"replication_factor": 3,
"instances": [
{
"id": "m3dbnode-0",
"isolation_group": "zone-a",
"zone": "embedded",
"weight": 100,
"endpoint": "m3dbnode-0.m3dbnode.m3db.svc.cluster.local:9000",
"hostname": "m3dbnode-0",
"port": 9000
},
{
"id": "m3dbnode-1",
"isolation_group": "zone-b",
"zone": "embedded",
"weight": 100,
"endpoint": "m3dbnode-1.m3dbnode.m3db.svc.cluster.local:9000",
"hostname": "m3dbnode-1",
"port": 9000
},
{
"id": "m3dbnode-2",
"isolation_group": "zone-c",
"zone": "embedded",
"weight": 100,
"endpoint": "m3dbnode-2.m3dbnode.m3db.svc.cluster.local:9000",
"hostname": "m3dbnode-2",
"port": 9000
}
]
}'
echo ""
echo "=== Creating unaggregated namespace (48h retention) ==="
curl -sSf -X POST "${COORD}/api/v1/services/m3db/namespace" \
-H "Content-Type: application/json" \
-d '{
"name": "default",
"options": {
"bootstrapEnabled": true,
"flushEnabled": true,
"writesToCommitLog": true,
"cleanupEnabled": true,
"snapshotEnabled": true,
"repairEnabled": false,
"retentionOptions": {
"retentionPeriodDuration": "48h",
"blockSizeDuration": "2h",
"bufferFutureDuration": "10m",
"bufferPastDuration": "10m"
},
"indexOptions": {
"enabled": true,
"blockSizeDuration": "2h"
}
}
}'
echo ""
echo "=== Creating aggregated namespace: 10s resolution, 30d retention ==="
curl -sSf -X POST "${COORD}/api/v1/services/m3db/namespace" \
-H "Content-Type: application/json" \
-d '{
"name": "agg_10s_30d",
"options": {
"bootstrapEnabled": true,
"flushEnabled": true,
"writesToCommitLog": true,
"cleanupEnabled": true,
"snapshotEnabled": true,
"retentionOptions": {
"retentionPeriodDuration": "720h",
"blockSizeDuration": "12h",
"bufferFutureDuration": "10m",
"bufferPastDuration": "10m"
},
"indexOptions": {
"enabled": true,
"blockSizeDuration": "12h"
},
"aggregationOptions": {
"aggregations": [
{
"aggregated": true,
"attributes": {
"resolutionDuration": "10s"
}
}
]
}
}
}'
echo ""
echo "=== Creating aggregated namespace: 1m resolution, 1y retention ==="
curl -sSf -X POST "${COORD}/api/v1/services/m3db/namespace" \
-H "Content-Type: application/json" \
-d '{
"name": "agg_1m_1y",
"options": {
"bootstrapEnabled": true,
"flushEnabled": true,
"writesToCommitLog": true,
"cleanupEnabled": true,
"snapshotEnabled": true,
"retentionOptions": {
"retentionPeriodDuration": "8760h",
"blockSizeDuration": "24h",
"bufferFutureDuration": "10m",
"bufferPastDuration": "10m"
},
"indexOptions": {
"enabled": true,
"blockSizeDuration": "24h"
},
"aggregationOptions": {
"aggregations": [
{
"aggregated": true,
"attributes": {
"resolutionDuration": "1m"
}
}
]
}
}
}'
echo ""
echo "=== Waiting for namespace initialization ==="
sleep 10
curl -sSf "${COORD}/api/v1/services/m3db/namespace/ready" \
-H "Content-Type: application/json" \
-d '{ "name": "default" }' || echo "Namespace not ready yet — this is normal, bootstrapping takes a few minutes."
echo ""
echo "=== M3DB cluster initialization complete ==="
echo "Prometheus remote_write → ${COORD}/api/v1/prom/remote/write"
echo "Prometheus remote_read → ${COORD}/api/v1/prom/remote/read"
echo "PromQL queries → ${COORD}/api/v1/query"

111
README.md Normal file
View File

@@ -0,0 +1,111 @@
# M3DB on Vultr Kubernetes Engine
Drop-in Mimir replacement using M3DB for long-term Prometheus metrics storage, deployed on Vultr VKE with Vultr Block Storage CSI.
## Architecture
```
Prometheus ──remote_write──▶ M3 Coordinator (Deployment, 2 replicas)
Grafana ──PromQL query──▶ │
┌───────┴───────┐
│ M3DB Nodes │ (StatefulSet, 3 replicas)
│ Vultr Block │ (100Gi SSD per node)
│ Storage │
└───────┬───────┘
etcd cluster (StatefulSet, 3 replicas)
```
## Retention Tiers
| Namespace | Resolution | Retention | Use Case |
|----------------|-----------|-----------|---------------------------|
| `default` | raw | 48h | Real-time queries |
| `agg_10s_30d` | 10s | 30 days | Recent dashboards |
| `agg_1m_1y` | 1m | 1 year | Long-term trends/capacity |
## Deployment
```bash
# 1. Apply everything (except the init job won't succeed until pods are up)
kubectl apply -k .
# 2. Wait for all pods to be Ready
kubectl -n m3db get pods -w
# 3. Once all m3dbnode and m3coordinator pods are Running, the init job
# will bootstrap the cluster (placement + namespaces).
# Monitor it:
kubectl -n m3db logs -f job/m3db-cluster-init
# 4. Verify cluster health
kubectl -n m3db port-forward svc/m3coordinator 7201:7201
curl http://localhost:7201/api/v1/services/m3db/placement
curl http://localhost:7201/api/v1/services/m3db/namespace
```
## Prometheus Configuration (Replacing Mimir)
Update your Prometheus config to point at M3 Coordinator instead of Mimir:
```yaml
# prometheus.yml
remote_write:
- url: "http://m3coordinator.m3db.svc.cluster.local:7201/api/v1/prom/remote/write"
queue_config:
capacity: 10000
max_shards: 30
max_samples_per_send: 5000
batch_send_deadline: 5s
remote_read:
- url: "http://m3coordinator.m3db.svc.cluster.local:7201/api/v1/prom/remote/read"
read_recent: true
```
## Grafana Datasource
Add a **Prometheus** datasource in Grafana pointing to:
```
http://m3coordinator.m3db.svc.cluster.local:7201
```
All existing PromQL dashboards will work without modification.
## Migration from Mimir
1. **Dual-write phase**: Configure Prometheus to remote_write to both Mimir and M3DB simultaneously.
2. **Validation**: Compare query results between Mimir and M3DB for the same time ranges.
3. **Cutover**: Once retention in M3DB covers your needs, remove the Mimir remote_write target.
4. **Cleanup**: Decommission Mimir components.
## Tuning for Vultr
- **Storage**: The `vultr-block-storage-m3db` StorageClass uses `high_perf` (NVMe SSD). Adjust `storage` in the VolumeClaimTemplates based on your cardinality and retention.
- **Node sizing**: M3DB is memory-hungry. Recommend at least 8GB RAM nodes on Vultr. The manifest requests 4Gi per m3dbnode pod.
- **Shards**: The init job creates 64 shards across 3 nodes. For higher cardinality, increase to 128 or 256.
- **Volume expansion**: The StorageClass has `allowVolumeExpansion: true` — you can resize PVCs online via `kubectl edit pvc`.
## Useful Commands
```bash
# Check placement
curl http://localhost:7201/api/v1/services/m3db/placement | jq
# Check namespace readiness
curl http://localhost:7201/api/v1/services/m3db/namespace/ready \
-d '{"name":"default"}'
# Write a test metric
curl -X POST http://localhost:7201/api/v1/prom/remote/write \
-H "Content-Type: application/x-protobuf"
# Query via PromQL
curl "http://localhost:7201/api/v1/query?query=up"
# Delete the init job to re-run (if needed)
kubectl -n m3db delete job m3db-cluster-init
kubectl apply -f 06-init-and-pdb.yaml
```

11
kustomization.yaml Normal file
View File

@@ -0,0 +1,11 @@
apiVersion: kustomize.k8s.io/v1beta1
kind: Kustomization
resources:
- 00-namespace.yaml
- 01-storageclass.yaml
- 02-etcd.yaml
- 03-configmaps.yaml
- 04-m3dbnode.yaml
- 05-m3coordinator.yaml
- 06-init-and-pdb.yaml