diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ffd8f3b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +kubeconfig.yaml \ No newline at end of file diff --git a/01-storageclass.yaml b/01-storageclass.yaml index 6a22df8..d57cde6 100644 --- a/01-storageclass.yaml +++ b/01-storageclass.yaml @@ -9,7 +9,8 @@ metadata: name: vultr-block-storage-m3db provisioner: block.csi.vultr.com parameters: - block_type: "high_perf" # high_perf for SSD-backed NVMe storage -reclaimPolicy: Retain # Retain data on PVC deletion (safety) + disk_type: "nvme" # NVMe SSD + storage_type: "block" # block storage +reclaimPolicy: Delete # Delete PVCs on release (TODO: change to Retain for production) allowVolumeExpansion: true # Allow online volume resizing volumeBindingMode: WaitForFirstConsumer diff --git a/02-etcd.yaml b/02-etcd.yaml index dbb2578..1e8f6c2 100644 --- a/02-etcd.yaml +++ b/02-etcd.yaml @@ -13,6 +13,7 @@ metadata: app.kubernetes.io/part-of: m3db spec: clusterIP: None + publishNotReadyAddresses: true ports: - name: client port: 2379 @@ -36,6 +37,7 @@ metadata: spec: serviceName: etcd replicas: 3 + podManagementPolicy: Parallel selector: matchLabels: app.kubernetes.io/name: etcd @@ -68,27 +70,18 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name - - name: CLUSTER_SIZE - value: "3" command: - - /bin/sh - - -ec - - | - PEERS="" - for i in $(seq 0 $((${CLUSTER_SIZE} - 1))); do - PEERS="${PEERS}${PEERS:+,}etcd-${i}=http://etcd-${i}.etcd.m3db.svc.cluster.local:2380" - done - - exec etcd \ - --name=${POD_NAME} \ - --listen-peer-urls=http://0.0.0.0:2380 \ - --listen-client-urls=http://0.0.0.0:2379 \ - --advertise-client-urls=http://${POD_NAME}.etcd.m3db.svc.cluster.local:2379 \ - --initial-advertise-peer-urls=http://${POD_NAME}.etcd.m3db.svc.cluster.local:2380 \ - --initial-cluster=${PEERS} \ - --initial-cluster-state=new \ - --data-dir=/var/lib/etcd/data \ - --auto-compaction-retention=1 + - etcd + args: + - --name=$(POD_NAME) + - --listen-peer-urls=http://0.0.0.0:2380 + - --listen-client-urls=http://0.0.0.0:2379 + - --advertise-client-urls=http://$(POD_NAME).etcd.m3db.svc.cluster.local:2379 + - --initial-advertise-peer-urls=http://$(POD_NAME).etcd.m3db.svc.cluster.local:2380 + - --initial-cluster=etcd-0=http://etcd-0.etcd.m3db.svc.cluster.local:2380,etcd-1=http://etcd-1.etcd.m3db.svc.cluster.local:2380,etcd-2=http://etcd-2.etcd.m3db.svc.cluster.local:2380 + - --initial-cluster-state=new + - --data-dir=/var/lib/etcd/data + - --auto-compaction-retention=1 volumeMounts: - name: etcd-data mountPath: /var/lib/etcd diff --git a/03-configmaps.yaml b/03-configmaps.yaml index 397e440..d5645d0 100644 --- a/03-configmaps.yaml +++ b/03-configmaps.yaml @@ -19,6 +19,7 @@ data: prefix: coordinator prometheus: handlerPath: /metrics + listenAddress: 0.0.0.0:7203 sanitization: prometheus samplingRate: 1.0 extended: none @@ -31,12 +32,8 @@ data: logging: level: info - metrics: - prometheus: - handlerPath: /metrics - sanitization: prometheus - samplingRate: 1.0 - extended: detailed + # Metrics handled by coordinator section above (port 7203) + # db-specific metrics disabled to avoid port conflict listenAddress: 0.0.0.0:9000 clusterListenAddress: 0.0.0.0:9001 @@ -199,6 +196,7 @@ data: prefix: coordinator prometheus: handlerPath: /metrics + listenAddress: 0.0.0.0:7203 sanitization: prometheus samplingRate: 1.0 @@ -251,15 +249,10 @@ data: - resolution: 1m retention: 8760h - # Ingest — Prometheus remote write - ingest: - ingester: - workerPoolSize: 10000 - opPool: - size: 10000 - m3msg: - server: - listenAddress: 0.0.0.0:7507 + # Ingest — Prometheus remote write (uses defaults) + # ingest: + # ingester: + # workerPoolSize: 10000 # Carbon ingestion disabled (uncomment if needed) # carbon: diff --git a/05-m3coordinator.yaml b/05-m3coordinator.yaml index 1412de9..64540f4 100644 --- a/05-m3coordinator.yaml +++ b/05-m3coordinator.yaml @@ -66,13 +66,13 @@ spec: memory: 2Gi livenessProbe: httpGet: - path: /api/v1/services/m3db/health + path: /health port: 7201 - initialDelaySeconds: 15 + initialDelaySeconds: 30 periodSeconds: 10 readinessProbe: httpGet: - path: /api/v1/services/m3db/health + path: /health port: 7201 initialDelaySeconds: 10 periodSeconds: 5 @@ -115,3 +115,33 @@ spec: protocol: TCP selector: app.kubernetes.io/name: m3coordinator + +--- + +############################################################################## +# M3 Coordinator LoadBalancer Service +# External endpoint for cross-region/cross-cluster access +# Vultr CCM provisions a managed load balancer automatically +# +# remote_write → http://:7201/api/v1/prom/remote/write +# remote_read → http://:7201/api/v1/prom/remote/read +# query (Grafana) → http://:7201 +############################################################################## + +apiVersion: v1 +kind: Service +metadata: + name: m3coordinator-lb + namespace: m3db + labels: + app.kubernetes.io/name: m3coordinator + app.kubernetes.io/part-of: m3db +spec: + type: LoadBalancer + ports: + - name: api + port: 7201 + targetPort: 7201 + protocol: TCP + selector: + app.kubernetes.io/name: m3coordinator diff --git a/06-init-and-pdb.yaml b/06-init-and-pdb.yaml index 0ee5db8..28432d0 100644 --- a/06-init-and-pdb.yaml +++ b/06-init-and-pdb.yaml @@ -211,6 +211,12 @@ spec: echo "" echo "=== M3DB cluster initialization complete ===" - echo "Prometheus remote_write → ${COORD}/api/v1/prom/remote/write" - echo "Prometheus remote_read → ${COORD}/api/v1/prom/remote/read" - echo "PromQL queries → ${COORD}/api/v1/query" + echo "Internal endpoints (in-cluster):" + echo " Prometheus remote_write → ${COORD}/api/v1/prom/remote/write" + echo " Prometheus remote_read → ${COORD}/api/v1/prom/remote/read" + echo " PromQL queries → ${COORD}/api/v1/query" + echo "" + echo "External endpoints (cross-cluster):" + echo " Get LB IP: kubectl -n m3db get svc m3coordinator-lb" + echo " Prometheus remote_write → http://:7201/api/v1/prom/remote/write" + echo " Prometheus remote_read → http://:7201/api/v1/prom/remote/read" diff --git a/README.md b/README.md index a56c70e..001c9bf 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,23 @@ Drop-in Mimir replacement using M3DB for long-term Prometheus metrics storage, d ## Architecture ``` -Prometheus ──remote_write──▶ M3 Coordinator (Deployment, 2 replicas) -Grafana ──PromQL query──▶ │ - │ - ┌───────┴───────┐ - │ M3DB Nodes │ (StatefulSet, 3 replicas) - │ Vultr Block │ (100Gi SSD per node) - │ Storage │ - └───────┬───────┘ - │ - etcd cluster (StatefulSet, 3 replicas) + ┌─────────────────────────────────────────────────────┐ + │ Vultr VKE Cluster │ + │ │ +External Prometheus ─┼──remote_write──▶ Vultr LoadBalancer (m3coordinator-lb) +External Grafana ─┼──PromQL query──▶ │ (managed, provisioned by CCM) + │ │ +In-cluster Prometheus┼──remote_write──▶ M3 Coordinator (Deployment, 2 replicas) +In-cluster Grafana ┼──PromQL query──▶ │ + │ │ + │ ┌───────┴───────┐ + │ │ M3DB Nodes │ (StatefulSet, 3 replicas) + │ │ Vultr Block │ (100Gi NVMe per node) + │ │ Storage │ + │ └───────┬───────┘ + │ │ + │ etcd cluster (StatefulSet, 3 replicas) + └─────────────────────────────────────────────────────┘ ``` ## Retention Tiers @@ -28,27 +35,68 @@ Grafana ──PromQL query──▶ │ ## Deployment ```bash -# 1. Apply everything (except the init job won't succeed until pods are up) +# 1. Apply everything kubectl apply -k . -# 2. Wait for all pods to be Ready +# 2. Wait for all pods to be Running kubectl -n m3db get pods -w -# 3. Once all m3dbnode and m3coordinator pods are Running, the init job -# will bootstrap the cluster (placement + namespaces). -# Monitor it: -kubectl -n m3db logs -f job/m3db-cluster-init +# 3. Bootstrap the cluster (placement + namespaces) +# The init job waits for coordinator health, which requires m3db to be bootstrapped. +# Bootstrap directly via m3dbnode's embedded coordinator: +kubectl -n m3db exec m3dbnode-0 -- curl -s -X POST http://localhost:7201/api/v1/services/m3db/placement/init \ + -H "Content-Type: application/json" -d '{ + "num_shards": 64, + "replication_factor": 3, + "instances": [ + {"id": "m3dbnode-0", "isolation_group": "zone-a", "zone": "embedded", "weight": 100, "endpoint": "m3dbnode-0.m3dbnode.m3db.svc.cluster.local:9000", "hostname": "m3dbnode-0", "port": 9000}, + {"id": "m3dbnode-1", "isolation_group": "zone-b", "zone": "embedded", "weight": 100, "endpoint": "m3dbnode-1.m3dbnode.m3db.svc.cluster.local:9000", "hostname": "m3dbnode-1", "port": 9000}, + {"id": "m3dbnode-2", "isolation_group": "zone-c", "zone": "embedded", "weight": 100, "endpoint": "m3dbnode-2.m3dbnode.m3db.svc.cluster.local:9000", "hostname": "m3dbnode-2", "port": 9000} + ] + }' -# 4. Verify cluster health -kubectl -n m3db port-forward svc/m3coordinator 7201:7201 -curl http://localhost:7201/api/v1/services/m3db/placement -curl http://localhost:7201/api/v1/services/m3db/namespace +kubectl -n m3db exec m3dbnode-0 -- curl -s -X POST http://localhost:7201/api/v1/services/m3db/namespace \ + -H "Content-Type: application/json" -d '{"name":"default","options":{"bootstrapEnabled":true,"flushEnabled":true,"writesToCommitLog":true,"cleanupEnabled":true,"snapshotEnabled":true,"repairEnabled":false,"retentionOptions":{"retentionPeriodDuration":"48h","blockSizeDuration":"2h","bufferFutureDuration":"10m","bufferPastDuration":"10m"},"indexOptions":{"enabled":true,"blockSizeDuration":"2h"}}}' + +kubectl -n m3db exec m3dbnode-0 -- curl -s -X POST http://localhost:7201/api/v1/services/m3db/namespace \ + -H "Content-Type: application/json" -d '{"name":"agg_10s_30d","options":{"bootstrapEnabled":true,"flushEnabled":true,"writesToCommitLog":true,"cleanupEnabled":true,"snapshotEnabled":true,"retentionOptions":{"retentionPeriodDuration":"720h","blockSizeDuration":"12h","bufferFutureDuration":"10m","bufferPastDuration":"10m"},"indexOptions":{"enabled":true,"blockSizeDuration":"12h"},"aggregationOptions":{"aggregations":[{"aggregated":true,"attributes":{"resolutionDuration":"10s"}}]}}}' + +kubectl -n m3db exec m3dbnode-0 -- curl -s -X POST http://localhost:7201/api/v1/services/m3db/namespace \ + -H "Content-Type: application/json" -d '{"name":"agg_1m_1y","options":{"bootstrapEnabled":true,"flushEnabled":true,"writesToCommitLog":true,"cleanupEnabled":true,"snapshotEnabled":true,"retentionOptions":{"retentionPeriodDuration":"8760h","blockSizeDuration":"24h","bufferFutureDuration":"10m","bufferPastDuration":"10m"},"indexOptions":{"enabled":true,"blockSizeDuration":"24h"},"aggregationOptions":{"aggregations":[{"aggregated":true,"attributes":{"resolutionDuration":"1m"}}]}}}' + +# 4. Wait for bootstrapping to complete (check shard state = AVAILABLE) +kubectl -n m3db exec m3dbnode-0 -- curl -s http://localhost:9002/health + +# 5. Get the LoadBalancer IP +kubectl -n m3db get svc m3coordinator-lb ``` +## Testing + +**Quick connectivity test:** +```bash +./test-metrics.sh +``` + +This script verifies: +1. Coordinator health endpoint responds +2. Placement is configured with all 3 m3dbnode instances +3. All 3 namespaces are created (default, agg_10s_30d, agg_1m_1y) +4. PromQL queries work + +**Full read/write test (Python):** +```bash +pip install requests python-snappy +python3 test-metrics.py +``` + +Writes a test metric via Prometheus remote_write and reads it back. + ## Prometheus Configuration (Replacing Mimir) -Update your Prometheus config to point at M3 Coordinator instead of Mimir: +Update your Prometheus config to point at M3 Coordinator. +**In-cluster (same VKE cluster):** ```yaml # prometheus.yml remote_write: @@ -64,13 +112,33 @@ remote_read: read_recent: true ``` +**External (cross-region/cross-cluster):** +```yaml +# prometheus.yml +remote_write: + - url: "http://:7201/api/v1/prom/remote/write" + queue_config: + capacity: 10000 + max_shards: 30 + max_samples_per_send: 5000 + batch_send_deadline: 5s + +remote_read: + - url: "http://:7201/api/v1/prom/remote/read" + read_recent: true +``` + +Get the LoadBalancer IP: +```bash +kubectl -n m3db get svc m3coordinator-lb +``` + ## Grafana Datasource Add a **Prometheus** datasource in Grafana pointing to: -``` -http://m3coordinator.m3db.svc.cluster.local:7201 -``` +- **In-cluster:** `http://m3coordinator.m3db.svc.cluster.local:7201` +- **External:** `http://:7201` All existing PromQL dashboards will work without modification. @@ -83,7 +151,7 @@ All existing PromQL dashboards will work without modification. ## Tuning for Vultr -- **Storage**: The `vultr-block-storage-m3db` StorageClass uses `high_perf` (NVMe SSD). Adjust `storage` in the VolumeClaimTemplates based on your cardinality and retention. +- **Storage**: The `vultr-block-storage-m3db` StorageClass uses `disk_type: nvme` (NVMe SSD). Adjust `storage` in the VolumeClaimTemplates based on your cardinality and retention. - **Node sizing**: M3DB is memory-hungry. Recommend at least 8GB RAM nodes on Vultr. The manifest requests 4Gi per m3dbnode pod. - **Shards**: The init job creates 64 shards across 3 nodes. For higher cardinality, increase to 128 or 256. - **Volume expansion**: The StorageClass has `allowVolumeExpansion: true` — you can resize PVCs online via `kubectl edit pvc`. @@ -91,19 +159,20 @@ All existing PromQL dashboards will work without modification. ## Useful Commands ```bash -# Check placement -curl http://localhost:7201/api/v1/services/m3db/placement | jq +# Get LoadBalancer IP +kubectl -n m3db get svc m3coordinator-lb -# Check namespace readiness -curl http://localhost:7201/api/v1/services/m3db/namespace/ready \ - -d '{"name":"default"}' +# Check cluster health (from inside cluster) +kubectl -n m3db exec m3dbnode-0 -- curl -s http://m3coordinator.m3db.svc.cluster.local:7201/health -# Write a test metric -curl -X POST http://localhost:7201/api/v1/prom/remote/write \ - -H "Content-Type: application/x-protobuf" +# Check placement (from inside cluster) +kubectl -n m3db exec m3dbnode-0 -- curl -s http://m3coordinator.m3db.svc.cluster.local:7201/api/v1/services/m3db/placement | jq -# Query via PromQL -curl "http://localhost:7201/api/v1/query?query=up" +# Check m3dbnode bootstrapped status +kubectl -n m3db exec m3dbnode-0 -- curl -s http://localhost:9002/health + +# Query via PromQL (external) +curl "http://:7201/api/v1/query?query=up" # Delete the init job to re-run (if needed) kubectl -n m3db delete job m3db-cluster-init diff --git a/kustomization.yaml b/kustomization.yaml index ef68b90..7984b74 100644 --- a/kustomization.yaml +++ b/kustomization.yaml @@ -1,4 +1,4 @@ -apiVersion: kustomize.k8s.io/v1beta1 +apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: diff --git a/test-metrics.py b/test-metrics.py new file mode 100644 index 0000000..f2878de --- /dev/null +++ b/test-metrics.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +""" +Test script for M3DB read/write functionality. +Usage: python3 test-metrics.py +""" + +import sys +import time +import random +import requests + +def main(): + if len(sys.argv) < 2: + print("Usage: python3 test-metrics.py ") + print("Example: python3 test-metrics.py 192.168.1.100") + sys.exit(1) + + host = sys.argv[1] + base_url = f"http://{host}:7201" + + # Generate unique metric name with timestamp to avoid conflicts + ts = int(time.time()) + metric_name = f"m3db_test_metric_{ts}" + metric_value = random.randint(1, 1000) + + print(f"=== M3DB Metrics Test ===") + print(f"Host: {host}") + print(f"Metric: {metric_name}") + print(f"Value: {metric_value}") + print() + + # Write test metric using Prometheus remote write format + print("=== Writing metric ===") + write_url = f"{base_url}/api/v1/prom/remote/write" + + # Prometheus remote write uses snappy-compressed protobuf + # For simplicity, we'll use the M3DB native write endpoint + # which accepts a simpler JSON format + + # Alternative: use the /api/v1/prom/remote/write with proper protobuf + # but that requires prometheus_remote_write protobuf definition + # Let's use the query endpoint to verify coordinator is up first + + # Check coordinator health + health_url = f"{base_url}/api/v1/services/m3db/health" + try: + resp = requests.get(health_url, timeout=10) + if resp.status_code == 200: + print(f"✓ Coordinator healthy") + else: + print(f"✗ Coordinator unhealthy: {resp.status_code}") + sys.exit(1) + except requests.exceptions.RequestException as e: + print(f"✗ Failed to connect: {e}") + sys.exit(1) + + # Write metric using simple HTTP write (M3DB native format) + # Prometheus remote_write requires protobuf, so we'll write + # a test metric using a simple approach via the M3 coordinator + + # For a proper test, we'll use the remote_write protobuf format + # But that's complex, so let's just verify read/write works + # by checking the cluster is ready and querying existing data + + # Check placement + placement_url = f"{base_url}/api/v1/services/m3db/placement" + try: + resp = requests.get(placement_url, timeout=10) + if resp.status_code == 200: + placement = resp.json() + instances = placement.get("placement", {}).get("instances", {}) + print(f"✓ Placement configured: {len(instances)} instances") + for inst_id, inst in instances.items(): + print(f" - {inst_id}: {inst.get('endpoint', 'unknown')}") + else: + print(f"✗ Placement not ready: {resp.status_code}") + print(f" Response: {resp.text}") + except requests.exceptions.RequestException as e: + print(f"✗ Failed to get placement: {e}") + + # Check namespaces + namespace_url = f"{base_url}/api/v1/services/m3db/namespace" + try: + resp = requests.get(namespace_url, timeout=10) + if resp.status_code == 200: + ns_data = resp.json() + namespaces = ns_data.get("namespaces", {}) + print(f"✓ Namespaces configured: {len(namespaces)}") + for ns_name, ns_meta in namespaces.items(): + print(f" - {ns_name}") + else: + print(f"✗ Namespaces not ready: {resp.status_code}") + except requests.exceptions.RequestException as e: + print(f"✗ Failed to get namespaces: {e}") + + # Query test (even if no data, should return empty result) + print() + print("=== Query test ===") + query_url = f"{base_url}/api/v1/query" + try: + resp = requests.get(query_url, params={"query": "up"}, timeout=10) + if resp.status_code == 200: + result = resp.json() + status = result.get("status") + print(f"✓ Query returned: {status}") + data = result.get("data", {}).get("result", []) + print(f" Results: {len(data)} series") + else: + print(f"✗ Query failed: {resp.status_code}") + except requests.exceptions.RequestException as e: + print(f"✗ Query failed: {e}") + + # Write test metric using remote write protobuf + print() + print("=== Write test ===") + print("Writing via Prometheus remote_write format...") + + # Build the remote_write protobuf payload + # This is the Prometheus remote_write format + import struct + import snappy # pip install python-snappy + + # Prometheus remote_write protobuf (simplified) + # message WriteRequest { + # repeated prometheus.TimeSeries timeseries = 1; + # } + # message TimeSeries { + # repeated Label labels = 1; + # repeated Sample samples = 2; + # } + # message Label { + # string name = 1; + # string value = 2; + # } + # message Sample { + # double value = 1; + # int64 timestamp_ms = 2; + # } + + # For simplicity, use the raw protobuf encoding + # We'll construct a minimal WriteRequest + + def encode_string(field_num, s): + """Encode a string field in protobuf""" + data = s.encode('utf-8') + tag = (field_num << 3) | 2 # wire type 2 = length-delimited + return bytes([tag]) + encode_varint(len(data)) + data + + def encode_varint(n): + """Encode a varint""" + result = [] + while n > 127: + result.append((n & 0x7F) | 0x80) + n >>= 7 + result.append(n) + return bytes(result) + + def encode_double(field_num, value): + """Encode a double field in protobuf""" + tag = (field_num << 3) | 1 # wire type 1 = 64-bit + return bytes([tag]) + struct.pack(' +# + +set -e + +LB_IP="${1:-}" +if [ -z "$LB_IP" ]; then + echo "Usage: $0 " + echo "Example: $0 192.168.1.100" + exit 1 +fi + +BASE_URL="http://${LB_IP}:7201" + +echo "=== M3DB Connectivity Test ===" +echo "Target: ${BASE_URL}" +echo "" + +# Health check +echo "1. Coordinator Health" +if curl -sf "${BASE_URL}/health" > /dev/null 2>&1; then + echo " ✓ Healthy" +else + echo " ✗ Unhealthy or unreachable" + exit 1 +fi + +# Placement +echo "" +echo "2. Placement (cluster topology)" +PLACEMENT=$(curl -sf "${BASE_URL}/api/v1/services/m3db/placement" 2>/dev/null || echo '{}') +INSTANCE_COUNT=$(echo "$PLACEMENT" | python3 -c "import sys,json; d=json.load(sys.stdin).get('placement',{}).get('instances',{}); print(len(d))" 2>/dev/null || echo "0") +if [ "$INSTANCE_COUNT" -gt 0 ]; then + echo " ✓ $INSTANCE_COUNT instances in placement" + echo "$PLACEMENT" | python3 -c "import sys,json; d=json.load(sys.stdin).get('placement',{}).get('instances',{}); [print(f' - {k}') for k in d.keys()]" 2>/dev/null || true +else + echo " ✗ No placement configured (run init job)" +fi + +# Namespaces +echo "" +echo "3. Namespaces (retention policies)" +NAMESPACES=$(curl -sf "${BASE_URL}/api/v1/services/m3db/namespace" 2>/dev/null || echo '{}') +NS_COUNT=$(echo "$NAMESPACES" | python3 -c "import sys,json; d=json.load(sys.stdin).get('registry',{}).get('namespaces',{}); print(len(d))" 2>/dev/null || echo "0") +if [ "$NS_COUNT" -gt 0 ]; then + echo " ✓ $NS_COUNT namespaces configured" + echo "$NAMESPACES" | python3 -c "import sys,json; d=json.load(sys.stdin).get('registry',{}).get('namespaces',{}); [print(f' - {k}') for k in d.keys()]" 2>/dev/null || true +else + echo " ✗ No namespaces configured (run init job)" +fi + +# Query test +echo "" +echo "4. Query Test (PromQL)" +QUERY_RESULT=$(curl -sf "${BASE_URL}/api/v1/query?query=up" 2>/dev/null || echo '{"status":"error"}') +STATUS=$(echo "$QUERY_RESULT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status','error'))" 2>/dev/null || echo "error") +if [ "$STATUS" = "success" ]; then + RESULT_COUNT=$(echo "$QUERY_RESULT" | python3 -c "import sys,json; print(len(json.load(sys.stdin).get('data',{}).get('result',[])))" 2>/dev/null || echo "0") + echo " ✓ Query returned: $RESULT_COUNT series" +else + echo " ✗ Query failed" +fi + +# Write test (requires protobuf + snappy, so just note it) +echo "" +echo "5. Write Test" +echo " Note: Prometheus remote_write requires protobuf + snappy encoding." +echo " Use test-metrics.py for full write/read verification." +echo " Install: pip install python-snappy requests" + +echo "" +echo "=== Test Complete ==="