init commit

2026-03-31 08:28:16 -04:00
commit ac13c30905
9 changed files with 1028 additions and 0 deletions
--- a/00-namespace.yaml
+++ b/00-namespace.yaml
@@ -0,0 +1,13 @@
+##############################################################################
+# M3DB on Vultr Kubernetes Engine
+# Replaces Mimir for long-term metrics storage
+# Uses Vultr Block Storage CSI for persistent volumes
+##############################################################################
+
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: m3db
+  labels:
+    app.kubernetes.io/name: m3db
+    app.kubernetes.io/part-of: metrics-platform
--- a/01-storageclass.yaml
+++ b/01-storageclass.yaml
@@ -0,0 +1,15 @@
+##############################################################################
+# StorageClass — Vultr Block Storage CSI
+# Uses Vultr's CSI driver (csi.vultr.com) for dynamic provisioning
+##############################################################################
+
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: vultr-block-storage-m3db
+provisioner: block.csi.vultr.com
+parameters:
+  block_type: "high_perf"          # high_perf for SSD-backed NVMe storage
+reclaimPolicy: Retain              # Retain data on PVC deletion (safety)
+allowVolumeExpansion: true         # Allow online volume resizing
+volumeBindingMode: WaitForFirstConsumer
--- a/02-etcd.yaml
+++ b/02-etcd.yaml
@@ -0,0 +1,122 @@
+##############################################################################
+# etcd cluster for M3DB placement & topology
+# M3DB requires etcd for cluster coordination
+##############################################################################
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: etcd
+  namespace: m3db
+  labels:
+    app.kubernetes.io/name: etcd
+    app.kubernetes.io/part-of: m3db
+spec:
+  clusterIP: None
+  ports:
+    - name: client
+      port: 2379
+      targetPort: 2379
+    - name: peer
+      port: 2380
+      targetPort: 2380
+  selector:
+    app.kubernetes.io/name: etcd
+
+---
+
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: etcd
+  namespace: m3db
+  labels:
+    app.kubernetes.io/name: etcd
+    app.kubernetes.io/part-of: m3db
+spec:
+  serviceName: etcd
+  replicas: 3
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: etcd
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: etcd
+        app.kubernetes.io/part-of: m3db
+    spec:
+      affinity:
+        podAntiAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app.kubernetes.io/name
+                    operator: In
+                    values:
+                      - etcd
+              topologyKey: kubernetes.io/hostname
+      containers:
+        - name: etcd
+          image: quay.io/coreos/etcd:v3.5.15
+          ports:
+            - containerPort: 2379
+              name: client
+            - containerPort: 2380
+              name: peer
+          env:
+            - name: POD_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
+            - name: CLUSTER_SIZE
+              value: "3"
+          command:
+            - /bin/sh
+            - -ec
+            - |
+              PEERS=""
+              for i in $(seq 0 $((${CLUSTER_SIZE} - 1))); do
+                PEERS="${PEERS}${PEERS:+,}etcd-${i}=http://etcd-${i}.etcd.m3db.svc.cluster.local:2380"
+              done
+
+              exec etcd \
+                --name=${POD_NAME} \
+                --listen-peer-urls=http://0.0.0.0:2380 \
+                --listen-client-urls=http://0.0.0.0:2379 \
+                --advertise-client-urls=http://${POD_NAME}.etcd.m3db.svc.cluster.local:2379 \
+                --initial-advertise-peer-urls=http://${POD_NAME}.etcd.m3db.svc.cluster.local:2380 \
+                --initial-cluster=${PEERS} \
+                --initial-cluster-state=new \
+                --data-dir=/var/lib/etcd/data \
+                --auto-compaction-retention=1
+          volumeMounts:
+            - name: etcd-data
+              mountPath: /var/lib/etcd
+          resources:
+            requests:
+              cpu: 200m
+              memory: 256Mi
+            limits:
+              cpu: 500m
+              memory: 512Mi
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: 2379
+            initialDelaySeconds: 15
+            periodSeconds: 10
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: 2379
+            initialDelaySeconds: 5
+            periodSeconds: 5
+  volumeClaimTemplates:
+    - metadata:
+        name: etcd-data
+      spec:
+        storageClassName: vultr-block-storage-m3db
+        accessModes: ["ReadWriteOnce"]
+        resources:
+          requests:
+            storage: 10Gi
--- a/03-configmaps.yaml
+++ b/03-configmaps.yaml
@@ -0,0 +1,267 @@
+##############################################################################
+# M3DB Configuration
+# Tuned for replacing Mimir — supports Prometheus remote write/read
+##############################################################################
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: m3db-config
+  namespace: m3db
+  labels:
+    app.kubernetes.io/name: m3db
+data:
+  m3dbnode.yml: |
+    coordinator:
+      listenAddress: 0.0.0.0:7201
+      metrics:
+        scope:
+          prefix: coordinator
+        prometheus:
+          handlerPath: /metrics
+        sanitization: prometheus
+        samplingRate: 1.0
+        extended: none
+
+      # Prometheus remote write/read endpoints (Mimir replacement)
+      tagOptions:
+        idScheme: quoted
+
+    db:
+      logging:
+        level: info
+
+      metrics:
+        prometheus:
+          handlerPath: /metrics
+        sanitization: prometheus
+        samplingRate: 1.0
+        extended: detailed
+
+      listenAddress: 0.0.0.0:9000
+      clusterListenAddress: 0.0.0.0:9001
+      httpNodeListenAddress: 0.0.0.0:9002
+      httpClusterListenAddress: 0.0.0.0:9003
+      debugListenAddress: 0.0.0.0:9004
+
+      hostID:
+        resolver: hostname
+
+      client:
+        writeConsistencyLevel: majority
+        readConsistencyLevel: unstrict_majority
+        writeTimeout: 10s
+        fetchTimeout: 15s
+        connectTimeout: 20s
+        writeRetry:
+          initialBackoff: 500ms
+          backoffFactor: 3
+          maxRetries: 2
+          jitter: true
+        fetchRetry:
+          initialBackoff: 500ms
+          backoffFactor: 2
+          maxRetries: 3
+          jitter: true
+
+      # Cluster discovery via etcd
+      discovery:
+        config:
+          service:
+            env: default_env
+            zone: embedded
+            service: m3db
+            cacheDir: /var/lib/m3kv
+            etcdClusters:
+              - zone: embedded
+                endpoints:
+                  - http://etcd-0.etcd.m3db.svc.cluster.local:2379
+                  - http://etcd-1.etcd.m3db.svc.cluster.local:2379
+                  - http://etcd-2.etcd.m3db.svc.cluster.local:2379
+
+      # Cache configuration
+      cache:
+        series:
+          policy: lru
+        postingsList:
+          size: 262144
+
+      # Commit log
+      commitlog:
+        flushMaxBytes: 524288
+        flushEvery: 1s
+        queue:
+          calculationType: fixed
+          size: 2097152
+
+      # Filesystem (data persistence)
+      filesystem:
+        filePathPrefix: /var/lib/m3db
+        writeBufferSize: 65536
+        dataReadBufferSize: 65536
+        infoReadBufferSize: 128
+        seekReadBufferSize: 4096
+        throughputLimitMbps: 1000.0
+        throughputCheckEvery: 128
+
+      # Repair disabled by default — enable once cluster is stable
+      repair:
+        enabled: false
+
+      # Pooling for performance
+      pooling:
+        blockAllocSize: 16
+        type: simple
+        seriesPool:
+          size: 262144
+          lowWatermark: 0.7
+          highWatermark: 1.0
+        blockPool:
+          size: 262144
+          lowWatermark: 0.7
+          highWatermark: 1.0
+        encoderPool:
+          size: 262144
+          lowWatermark: 0.7
+          highWatermark: 1.0
+        segmentReaderPool:
+          size: 16384
+          lowWatermark: 0.2
+          highWatermark: 1.0
+        iteratorPool:
+          size: 2048
+          lowWatermark: 0.2
+          highWatermark: 1.0
+        fetchBlockMetadataResultsPool:
+          size: 65536
+          capacity: 32
+          lowWatermark: 0.01
+          highWatermark: 1.0
+        fetchBlocksMetadataResultsPool:
+          size: 32
+          capacity: 4096
+          lowWatermark: 0.01
+          highWatermark: 1.0
+        bytesPool:
+          buckets:
+            - capacity: 16
+              size: 524288
+              lowWatermark: 0.01
+              highWatermark: 1.0
+            - capacity: 32
+              size: 262144
+              lowWatermark: 0.01
+              highWatermark: 1.0
+            - capacity: 64
+              size: 131072
+              lowWatermark: 0.01
+              highWatermark: 1.0
+            - capacity: 128
+              size: 65536
+              lowWatermark: 0.01
+              highWatermark: 1.0
+            - capacity: 256
+              size: 65536
+              lowWatermark: 0.01
+              highWatermark: 1.0
+            - capacity: 1440
+              size: 16384
+              lowWatermark: 0.01
+              highWatermark: 1.0
+            - capacity: 4096
+              size: 8192
+              lowWatermark: 0.01
+              highWatermark: 1.0
+
+---
+
+##############################################################################
+# M3 Coordinator standalone config
+# Handles Prometheus remote read/write + Grafana queries
+##############################################################################
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: m3coordinator-config
+  namespace: m3db
+  labels:
+    app.kubernetes.io/name: m3coordinator
+data:
+  m3coordinator.yml: |
+    listenAddress: 0.0.0.0:7201
+
+    logging:
+      level: info
+
+    metrics:
+      scope:
+        prefix: coordinator
+      prometheus:
+        handlerPath: /metrics
+      sanitization: prometheus
+      samplingRate: 1.0
+
+    tagOptions:
+      idScheme: quoted
+
+    clusters:
+      - namespaces:
+          - namespace: default
+            type: unaggregated
+            retention: 48h
+          - namespace: agg_10s_30d
+            type: aggregated
+            retention: 720h
+            resolution: 10s
+          - namespace: agg_1m_1y
+            type: aggregated
+            retention: 8760h
+            resolution: 1m
+        client:
+          config:
+            service:
+              env: default_env
+              zone: embedded
+              service: m3db
+              cacheDir: /var/lib/m3kv
+              etcdClusters:
+                - zone: embedded
+                  endpoints:
+                    - http://etcd-0.etcd.m3db.svc.cluster.local:2379
+                    - http://etcd-1.etcd.m3db.svc.cluster.local:2379
+                    - http://etcd-2.etcd.m3db.svc.cluster.local:2379
+          writeConsistencyLevel: majority
+          readConsistencyLevel: unstrict_majority
+
+    # Downsample configuration
+    downsample:
+      rules:
+        mappingRules:
+          - name: "10s for 30 days"
+            filter: "__name__:*"
+            aggregations: ["Last"]
+            storagePolicies:
+              - resolution: 10s
+                retention: 720h
+          - name: "1m for 1 year"
+            filter: "__name__:*"
+            aggregations: ["Last"]
+            storagePolicies:
+              - resolution: 1m
+                retention: 8760h
+
+    # Ingest — Prometheus remote write
+    ingest:
+      ingester:
+        workerPoolSize: 10000
+        opPool:
+          size: 10000
+      m3msg:
+        server:
+          listenAddress: 0.0.0.0:7507
+
+    # Carbon ingestion disabled (uncomment if needed)
+    # carbon:
+    #   ingester:
+    #     listenAddress: "0.0.0.0:7204"
--- a/04-m3dbnode.yaml
+++ b/04-m3dbnode.yaml
@@ -0,0 +1,156 @@
+##############################################################################
+# M3DB Node — Headless Service (for StatefulSet DNS)
+##############################################################################
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: m3dbnode
+  namespace: m3db
+  labels:
+    app.kubernetes.io/name: m3dbnode
+    app.kubernetes.io/part-of: m3db
+spec:
+  clusterIP: None
+  ports:
+    - name: client
+      port: 9000
+      targetPort: 9000
+    - name: cluster
+      port: 9001
+      targetPort: 9001
+    - name: http-node
+      port: 9002
+      targetPort: 9002
+    - name: http-cluster
+      port: 9003
+      targetPort: 9003
+    - name: debug
+      port: 9004
+      targetPort: 9004
+    - name: coordinator
+      port: 7201
+      targetPort: 7201
+  selector:
+    app.kubernetes.io/name: m3dbnode
+
+---
+
+##############################################################################
+# M3DB Node StatefulSet
+# 3 replicas — one per availability zone / node for HA
+##############################################################################
+
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: m3dbnode
+  namespace: m3db
+  labels:
+    app.kubernetes.io/name: m3dbnode
+    app.kubernetes.io/part-of: m3db
+spec:
+  serviceName: m3dbnode
+  replicas: 3
+  podManagementPolicy: Parallel
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: m3dbnode
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: m3dbnode
+        app.kubernetes.io/part-of: m3db
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "7203"
+    spec:
+      affinity:
+        podAntiAffinity:
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app.kubernetes.io/name
+                      operator: In
+                      values:
+                        - m3dbnode
+                topologyKey: kubernetes.io/hostname
+      securityContext:
+        fsGroup: 65534
+      terminationGracePeriodSeconds: 120
+      containers:
+        - name: m3dbnode
+          image: quay.io/m3db/m3dbnode:v1.5.0
+          imagePullPolicy: IfNotPresent
+          args:
+            - "-f"
+            - "/etc/m3db/m3dbnode.yml"
+          ports:
+            - containerPort: 9000
+              name: client
+            - containerPort: 9001
+              name: cluster
+            - containerPort: 9002
+              name: http-node
+            - containerPort: 9003
+              name: http-cluster
+            - containerPort: 9004
+              name: debug
+            - containerPort: 7201
+              name: coordinator
+            - containerPort: 7203
+              name: metrics
+          volumeMounts:
+            - name: m3db-data
+              mountPath: /var/lib/m3db
+            - name: m3db-config
+              mountPath: /etc/m3db
+            - name: cache-dir
+              mountPath: /var/lib/m3kv
+          resources:
+            requests:
+              cpu: "1"
+              memory: 4Gi
+            limits:
+              cpu: "2"
+              memory: 8Gi
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: 9002
+            initialDelaySeconds: 60
+            periodSeconds: 15
+            timeoutSeconds: 5
+            failureThreshold: 5
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: 9002
+            initialDelaySeconds: 30
+            periodSeconds: 10
+            timeoutSeconds: 5
+            failureThreshold: 3
+          lifecycle:
+            preStop:
+              exec:
+                command:
+                  - /bin/sh
+                  - -c
+                  - "sleep 30"   # allow in-flight writes to drain
+      volumes:
+        - name: m3db-config
+          configMap:
+            name: m3db-config
+        - name: cache-dir
+          emptyDir: {}
+  volumeClaimTemplates:
+    - metadata:
+        name: m3db-data
+      spec:
+        storageClassName: vultr-block-storage-m3db
+        accessModes: ["ReadWriteOnce"]
+        resources:
+          requests:
+            storage: 100Gi         # Adjust based on retention & cardinality
--- a/05-m3coordinator.yaml
+++ b/05-m3coordinator.yaml
@@ -0,0 +1,117 @@
+##############################################################################
+# M3 Coordinator — Deployment
+# Stateless query/write layer — Prometheus remote_write & remote_read target
+# This is what Grafana and Prometheus talk to (replaces Mimir endpoints)
+##############################################################################
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: m3coordinator
+  namespace: m3db
+  labels:
+    app.kubernetes.io/name: m3coordinator
+    app.kubernetes.io/part-of: m3db
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: m3coordinator
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: m3coordinator
+        app.kubernetes.io/part-of: m3db
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "7203"
+    spec:
+      affinity:
+        podAntiAffinity:
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app.kubernetes.io/name
+                      operator: In
+                      values:
+                        - m3coordinator
+                topologyKey: kubernetes.io/hostname
+      containers:
+        - name: m3coordinator
+          image: quay.io/m3db/m3coordinator:v1.5.0
+          imagePullPolicy: IfNotPresent
+          args:
+            - "-f"
+            - "/etc/m3coordinator/m3coordinator.yml"
+          ports:
+            - containerPort: 7201
+              name: api
+              protocol: TCP
+            - containerPort: 7203
+              name: metrics
+              protocol: TCP
+          volumeMounts:
+            - name: config
+              mountPath: /etc/m3coordinator
+            - name: cache-dir
+              mountPath: /var/lib/m3kv
+          resources:
+            requests:
+              cpu: 500m
+              memory: 1Gi
+            limits:
+              cpu: "1"
+              memory: 2Gi
+          livenessProbe:
+            httpGet:
+              path: /api/v1/services/m3db/health
+              port: 7201
+            initialDelaySeconds: 15
+            periodSeconds: 10
+          readinessProbe:
+            httpGet:
+              path: /api/v1/services/m3db/health
+              port: 7201
+            initialDelaySeconds: 10
+            periodSeconds: 5
+      volumes:
+        - name: config
+          configMap:
+            name: m3coordinator-config
+        - name: cache-dir
+          emptyDir: {}
+
+---
+
+##############################################################################
+# M3 Coordinator Service
+# Endpoints for Prometheus remote_write / remote_read / Grafana
+#
+# remote_write → http://m3coordinator.m3db.svc.cluster.local:7201/api/v1/prom/remote/write
+# remote_read  → http://m3coordinator.m3db.svc.cluster.local:7201/api/v1/prom/remote/read
+# query (Grafana Prometheus datasource) → http://m3coordinator.m3db.svc.cluster.local:7201
+##############################################################################
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: m3coordinator
+  namespace: m3db
+  labels:
+    app.kubernetes.io/name: m3coordinator
+    app.kubernetes.io/part-of: m3db
+spec:
+  type: ClusterIP
+  ports:
+    - name: api
+      port: 7201
+      targetPort: 7201
+      protocol: TCP
+    - name: metrics
+      port: 7203
+      targetPort: 7203
+      protocol: TCP
+  selector:
+    app.kubernetes.io/name: m3coordinator
--- a/06-init-and-pdb.yaml
+++ b/06-init-and-pdb.yaml
@@ -0,0 +1,216 @@
+##############################################################################
+# PodDisruptionBudgets — keep quorum during rolling updates
+##############################################################################
+
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: m3dbnode-pdb
+  namespace: m3db
+spec:
+  minAvailable: 2
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: m3dbnode
+
+---
+
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: etcd-pdb
+  namespace: m3db
+spec:
+  minAvailable: 2
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: etcd
+
+---
+
+##############################################################################
+# Cluster Init Job
+# Run ONCE after all m3dbnode pods are Ready to:
+#   1. Create the placement (topology)
+#   2. Create the namespaces (retention policies)
+#   3. Wait for the cluster to initialize
+#
+#   kubectl apply -f 06-init-and-pdb.yaml
+#   (then monitor with: kubectl logs -n m3db job/m3db-cluster-init)
+##############################################################################
+
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: m3db-cluster-init
+  namespace: m3db
+  labels:
+    app.kubernetes.io/name: m3db-init
+    app.kubernetes.io/part-of: m3db
+spec:
+  backoffLimit: 5
+  ttlSecondsAfterFinished: 3600
+  template:
+    spec:
+      restartPolicy: OnFailure
+      containers:
+        - name: init
+          image: curlimages/curl:8.7.1
+          command:
+            - /bin/sh
+            - -exc
+            - |
+              COORD="http://m3coordinator.m3db.svc.cluster.local:7201"
+
+              echo "=== Waiting for coordinator to be healthy ==="
+              until curl -sf "${COORD}/api/v1/services/m3db/health"; do
+                echo "Coordinator not ready yet, retrying in 5s..."
+                sleep 5
+              done
+
+              echo ""
+              echo "=== Creating M3DB placement ==="
+              curl -sSf -X POST "${COORD}/api/v1/services/m3db/placement/init" \
+                -H "Content-Type: application/json" \
+                -d '{
+                  "num_shards": 64,
+                  "replication_factor": 3,
+                  "instances": [
+                    {
+                      "id": "m3dbnode-0",
+                      "isolation_group": "zone-a",
+                      "zone": "embedded",
+                      "weight": 100,
+                      "endpoint": "m3dbnode-0.m3dbnode.m3db.svc.cluster.local:9000",
+                      "hostname": "m3dbnode-0",
+                      "port": 9000
+                    },
+                    {
+                      "id": "m3dbnode-1",
+                      "isolation_group": "zone-b",
+                      "zone": "embedded",
+                      "weight": 100,
+                      "endpoint": "m3dbnode-1.m3dbnode.m3db.svc.cluster.local:9000",
+                      "hostname": "m3dbnode-1",
+                      "port": 9000
+                    },
+                    {
+                      "id": "m3dbnode-2",
+                      "isolation_group": "zone-c",
+                      "zone": "embedded",
+                      "weight": 100,
+                      "endpoint": "m3dbnode-2.m3dbnode.m3db.svc.cluster.local:9000",
+                      "hostname": "m3dbnode-2",
+                      "port": 9000
+                    }
+                  ]
+                }'
+
+              echo ""
+              echo "=== Creating unaggregated namespace (48h retention) ==="
+              curl -sSf -X POST "${COORD}/api/v1/services/m3db/namespace" \
+                -H "Content-Type: application/json" \
+                -d '{
+                  "name": "default",
+                  "options": {
+                    "bootstrapEnabled": true,
+                    "flushEnabled": true,
+                    "writesToCommitLog": true,
+                    "cleanupEnabled": true,
+                    "snapshotEnabled": true,
+                    "repairEnabled": false,
+                    "retentionOptions": {
+                      "retentionPeriodDuration": "48h",
+                      "blockSizeDuration": "2h",
+                      "bufferFutureDuration": "10m",
+                      "bufferPastDuration": "10m"
+                    },
+                    "indexOptions": {
+                      "enabled": true,
+                      "blockSizeDuration": "2h"
+                    }
+                  }
+                }'
+
+              echo ""
+              echo "=== Creating aggregated namespace: 10s resolution, 30d retention ==="
+              curl -sSf -X POST "${COORD}/api/v1/services/m3db/namespace" \
+                -H "Content-Type: application/json" \
+                -d '{
+                  "name": "agg_10s_30d",
+                  "options": {
+                    "bootstrapEnabled": true,
+                    "flushEnabled": true,
+                    "writesToCommitLog": true,
+                    "cleanupEnabled": true,
+                    "snapshotEnabled": true,
+                    "retentionOptions": {
+                      "retentionPeriodDuration": "720h",
+                      "blockSizeDuration": "12h",
+                      "bufferFutureDuration": "10m",
+                      "bufferPastDuration": "10m"
+                    },
+                    "indexOptions": {
+                      "enabled": true,
+                      "blockSizeDuration": "12h"
+                    },
+                    "aggregationOptions": {
+                      "aggregations": [
+                        {
+                          "aggregated": true,
+                          "attributes": {
+                            "resolutionDuration": "10s"
+                          }
+                        }
+                      ]
+                    }
+                  }
+                }'
+
+              echo ""
+              echo "=== Creating aggregated namespace: 1m resolution, 1y retention ==="
+              curl -sSf -X POST "${COORD}/api/v1/services/m3db/namespace" \
+                -H "Content-Type: application/json" \
+                -d '{
+                  "name": "agg_1m_1y",
+                  "options": {
+                    "bootstrapEnabled": true,
+                    "flushEnabled": true,
+                    "writesToCommitLog": true,
+                    "cleanupEnabled": true,
+                    "snapshotEnabled": true,
+                    "retentionOptions": {
+                      "retentionPeriodDuration": "8760h",
+                      "blockSizeDuration": "24h",
+                      "bufferFutureDuration": "10m",
+                      "bufferPastDuration": "10m"
+                    },
+                    "indexOptions": {
+                      "enabled": true,
+                      "blockSizeDuration": "24h"
+                    },
+                    "aggregationOptions": {
+                      "aggregations": [
+                        {
+                          "aggregated": true,
+                          "attributes": {
+                            "resolutionDuration": "1m"
+                          }
+                        }
+                      ]
+                    }
+                  }
+                }'
+
+              echo ""
+              echo "=== Waiting for namespace initialization ==="
+              sleep 10
+              curl -sSf "${COORD}/api/v1/services/m3db/namespace/ready" \
+                -H "Content-Type: application/json" \
+                -d '{ "name": "default" }' || echo "Namespace not ready yet — this is normal, bootstrapping takes a few minutes."
+
+              echo ""
+              echo "=== M3DB cluster initialization complete ==="
+              echo "Prometheus remote_write → ${COORD}/api/v1/prom/remote/write"
+              echo "Prometheus remote_read  → ${COORD}/api/v1/prom/remote/read"
+              echo "PromQL queries          → ${COORD}/api/v1/query"
--- a/README.md
+++ b/README.md
@@ -0,0 +1,111 @@
+# M3DB on Vultr Kubernetes Engine
+
+Drop-in Mimir replacement using M3DB for long-term Prometheus metrics storage, deployed on Vultr VKE with Vultr Block Storage CSI.
+
+## Architecture
+
+```
+Prometheus ──remote_write──▶ M3 Coordinator (Deployment, 2 replicas)
+Grafana   ──PromQL query──▶       │
+                                  │
+                          ┌───────┴───────┐
+                          │   M3DB Nodes  │  (StatefulSet, 3 replicas)
+                          │  Vultr Block  │  (100Gi SSD per node)
+                          │   Storage     │
+                          └───────┬───────┘
+                                  │
+                            etcd cluster   (StatefulSet, 3 replicas)
+```
+
+## Retention Tiers
+
+| Namespace      | Resolution | Retention | Use Case                  |
+|----------------|-----------|-----------|---------------------------|
+| `default`      | raw       | 48h       | Real-time queries         |
+| `agg_10s_30d`  | 10s       | 30 days   | Recent dashboards         |
+| `agg_1m_1y`    | 1m        | 1 year    | Long-term trends/capacity |
+
+## Deployment
+
+```bash
+# 1. Apply everything (except the init job won't succeed until pods are up)
+kubectl apply -k .
+
+# 2. Wait for all pods to be Ready
+kubectl -n m3db get pods -w
+
+# 3. Once all m3dbnode and m3coordinator pods are Running, the init job
+#    will bootstrap the cluster (placement + namespaces).
+#    Monitor it:
+kubectl -n m3db logs -f job/m3db-cluster-init
+
+# 4. Verify cluster health
+kubectl -n m3db port-forward svc/m3coordinator 7201:7201
+curl http://localhost:7201/api/v1/services/m3db/placement
+curl http://localhost:7201/api/v1/services/m3db/namespace
+```
+
+## Prometheus Configuration (Replacing Mimir)
+
+Update your Prometheus config to point at M3 Coordinator instead of Mimir:
+
+```yaml
+# prometheus.yml
+remote_write:
+  - url: "http://m3coordinator.m3db.svc.cluster.local:7201/api/v1/prom/remote/write"
+    queue_config:
+      capacity: 10000
+      max_shards: 30
+      max_samples_per_send: 5000
+      batch_send_deadline: 5s
+
+remote_read:
+  - url: "http://m3coordinator.m3db.svc.cluster.local:7201/api/v1/prom/remote/read"
+    read_recent: true
+```
+
+## Grafana Datasource
+
+Add a **Prometheus** datasource in Grafana pointing to:
+
+```
+http://m3coordinator.m3db.svc.cluster.local:7201
+```
+
+All existing PromQL dashboards will work without modification.
+
+## Migration from Mimir
+
+1. **Dual-write phase**: Configure Prometheus to remote_write to both Mimir and M3DB simultaneously.
+2. **Validation**: Compare query results between Mimir and M3DB for the same time ranges.
+3. **Cutover**: Once retention in M3DB covers your needs, remove the Mimir remote_write target.
+4. **Cleanup**: Decommission Mimir components.
+
+## Tuning for Vultr
+
+- **Storage**: The `vultr-block-storage-m3db` StorageClass uses `high_perf` (NVMe SSD). Adjust `storage` in the VolumeClaimTemplates based on your cardinality and retention.
+- **Node sizing**: M3DB is memory-hungry. Recommend at least 8GB RAM nodes on Vultr. The manifest requests 4Gi per m3dbnode pod.
+- **Shards**: The init job creates 64 shards across 3 nodes. For higher cardinality, increase to 128 or 256.
+- **Volume expansion**: The StorageClass has `allowVolumeExpansion: true` — you can resize PVCs online via `kubectl edit pvc`.
+
+## Useful Commands
+
+```bash
+# Check placement
+curl http://localhost:7201/api/v1/services/m3db/placement | jq
+
+# Check namespace readiness
+curl http://localhost:7201/api/v1/services/m3db/namespace/ready \
+  -d '{"name":"default"}'
+
+# Write a test metric
+curl -X POST http://localhost:7201/api/v1/prom/remote/write \
+  -H "Content-Type: application/x-protobuf"
+
+# Query via PromQL
+curl "http://localhost:7201/api/v1/query?query=up"
+
+# Delete the init job to re-run (if needed)
+kubectl -n m3db delete job m3db-cluster-init
+kubectl apply -f 06-init-and-pdb.yaml
+```
--- a/kustomization.yaml
+++ b/kustomization.yaml
@@ -0,0 +1,11 @@
+apiVersion: kustomize.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - 00-namespace.yaml
+  - 01-storageclass.yaml
+  - 02-etcd.yaml
+  - 03-configmaps.yaml
+  - 04-m3dbnode.yaml
+  - 05-m3coordinator.yaml
+  - 06-init-and-pdb.yaml