From 7132fc40173658bf6f3a19bb37385c6a1d8ef534 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Tue, 20 Jan 2026 11:15:44 -0500 Subject: [PATCH] binary safe encoding --- concat_project.sh | 6 +- src/dynamodb/handler.zig | 8 +- src/dynamodb/json.zig | 274 ------------------------------- src/dynamodb/storage.zig | 295 ++++++++++----------------------- src/dynamodb/types.zig | 6 - src/http.zig | 23 --- src/key_codec.zig | 344 +++++++++++++++++++++++++++++++++++++++ src/rocksdb.zig | 75 --------- 8 files changed, 434 insertions(+), 597 deletions(-) create mode 100644 src/key_codec.zig diff --git a/concat_project.sh b/concat_project.sh index 36839c5..b57d0de 100755 --- a/concat_project.sh +++ b/concat_project.sh @@ -4,13 +4,13 @@ OUTPUT_FILE="project_context.txt" # Directories to exclude -EXCLUDE_DIRS=("zig-out" "data" ".git" "node_modules" ".zig-cache") +EXCLUDE_DIRS=("zig-out" "data" ".git" "node_modules" ".zig-cache" "tests") # File extensions to include (add more as needed) -INCLUDE_EXTENSIONS=("zig" "md" "yml" "yaml" "Makefile" "Dockerfile") +INCLUDE_EXTENSIONS=("zig" "Makefile") # Special files to include (without extension) -INCLUDE_FILES=("build.zig" "build.zig.zon" "Makefile" "Dockerfile" "docker-compose.yml" "README.md") +INCLUDE_FILES=("build.zig" "build.zig.zon" "Makefile") # Clear the output file > "$OUTPUT_FILE" diff --git a/src/dynamodb/handler.zig b/src/dynamodb/handler.zig index 8b1253b..07238b9 100644 --- a/src/dynamodb/handler.zig +++ b/src/dynamodb/handler.zig @@ -226,8 +226,8 @@ pub const ApiHandler = struct { storage.StorageError.MissingKeyAttribute => { _ = self.errorResponse(response, .ValidationException, "Item missing required key attribute"); }, - storage.StorageError.InvalidKey, storage.StorageError.KeyValueContainsSeparator => { - _ = self.errorResponse(response, .ValidationException, "Invalid key format or key contains ':' character (limitation will be removed in Phase 2)"); + storage.StorageError.InvalidKey => { + _ = self.errorResponse(response, .ValidationException, "Invalid key format"); }, else => { _ = self.errorResponse(response, .InternalServerError, "Failed to put item"); @@ -266,7 +266,7 @@ pub const ApiHandler = struct { storage.StorageError.MissingKeyAttribute => { _ = self.errorResponse(response, .ValidationException, "Key missing required attributes"); }, - storage.StorageError.InvalidKey, storage.StorageError.KeyValueContainsSeparator => { + storage.StorageError.InvalidKey => { _ = self.errorResponse(response, .ValidationException, "Invalid key format"); }, else => { @@ -321,7 +321,7 @@ pub const ApiHandler = struct { storage.StorageError.MissingKeyAttribute => { _ = self.errorResponse(response, .ValidationException, "Key missing required attributes"); }, - storage.StorageError.InvalidKey, storage.StorageError.KeyValueContainsSeparator => { + storage.StorageError.InvalidKey => { _ = self.errorResponse(response, .ValidationException, "Invalid key format"); }, else => { diff --git a/src/dynamodb/json.zig b/src/dynamodb/json.zig index cd91006..ffc3421 100644 --- a/src/dynamodb/json.zig +++ b/src/dynamodb/json.zig @@ -490,77 +490,6 @@ fn deepCopyAttributeValue(allocator: std.mem.Allocator, attr: types.AttributeVal }; } -/// Validate that a key attribute value doesn't contain the separator -/// PHASE 2 TODO: Remove this validation once we implement proper binary key encoding -/// with length-prefixed segments. Binary encoding will eliminate separator collision issues. -fn validateKeyValue(value: []const u8) !void { - if (std.mem.indexOf(u8, value, ":")) |_| { - return error.KeyValueContainsSeparator; - } -} - -/// Build a RocksDB storage key from table name and key attributes -/// Format: _data:{table}:{pk} or _data:{table}:{pk}:{sk} -/// -/// PHASE 2 TODO: Replace this textual key format with binary encoding: -/// - Use length-prefixed segments: 0x01 | "data" | len(table) | table | len(pk) | pk | [len(sk) | sk] -/// - This prevents separator collision and makes prefix scans reliable -/// - Current limitation: key values cannot contain ':' character -/// -/// Caller owns returned slice and must free it -pub fn buildRocksDBKey( - allocator: std.mem.Allocator, - table_name: []const u8, - key_schema: []const types.KeySchemaElement, - key: types.Item, -) ![]u8 { - const KeyPrefix = struct { - const data = "_data:"; - }; - - // Find partition key and sort key - var pk_value: ?[]const u8 = null; - var sk_value: ?[]const u8 = null; - - for (key_schema) |schema_element| { - const attr = key.get(schema_element.attribute_name) orelse - return error.MissingKeyAttribute; - - // Extract string value from attribute - // DynamoDB keys must be S (string), N (number), or B (binary) - const value = switch (attr) { - .S => |s| s, - .N => |n| n, - .B => |b| b, - else => return error.InvalidKeyType, - }; - - // PHASE 2 TODO: Remove this validation - binary encoding handles all values - try validateKeyValue(value); - - switch (schema_element.key_type) { - .HASH => pk_value = value, - .RANGE => sk_value = value, - } - } - - const pk = pk_value orelse return error.MissingPartitionKey; - - if (sk_value) |sk| { - return std.fmt.allocPrint( - allocator, - "{s}{s}:{s}:{s}", - .{ KeyPrefix.data, table_name, pk, sk }, - ); - } else { - return std.fmt.allocPrint( - allocator, - "{s}{s}:{s}", - .{ KeyPrefix.data, table_name, pk }, - ); - } -} - // ============================================================================ // Memory Management // ============================================================================ @@ -601,206 +530,3 @@ pub fn deinitItem(item: *types.Item, allocator: std.mem.Allocator) void { } item.deinit(); } - -// ============================================================================ -// Tests -// ============================================================================ - -test "parse simple string attribute" { - const allocator = std.testing.allocator; - - const json_str = "{\"S\":\"hello world\"}"; - const parsed = try std.json.parseFromSlice(std.json.Value, allocator, json_str, .{}); - defer parsed.deinit(); - - var attr = try parseAttributeValue(allocator, parsed.value); - defer deinitAttributeValue(&attr, allocator); - - try std.testing.expectEqualStrings("hello world", attr.S); -} - -test "parse simple item" { - const allocator = std.testing.allocator; - - const json_str = - \\{"pk":{"S":"user123"},"name":{"S":"Alice"},"age":{"N":"25"}} - ; - - var item = try parseItem(allocator, json_str); - defer deinitItem(&item, allocator); - - try std.testing.expectEqual(@as(usize, 3), item.count()); - - const pk = item.get("pk").?; - try std.testing.expectEqualStrings("user123", pk.S); - - const name = item.get("name").?; - try std.testing.expectEqualStrings("Alice", name.S); - - const age = item.get("age").?; - try std.testing.expectEqualStrings("25", age.N); -} - -test "parseItemFromValue" { - const allocator = std.testing.allocator; - - const json_str = "{\"pk\":{\"S\":\"test\"},\"data\":{\"N\":\"42\"}}"; - const parsed = try std.json.parseFromSlice(std.json.Value, allocator, json_str, .{}); - defer parsed.deinit(); - - var item = try parseItemFromValue(allocator, parsed.value); - defer deinitItem(&item, allocator); - - try std.testing.expectEqual(@as(usize, 2), item.count()); -} - -test "parse nested map" { - const allocator = std.testing.allocator; - - const json_str = - \\{"data":{"M":{"key1":{"S":"value1"},"key2":{"N":"42"}}}} - ; - - var item = try parseItem(allocator, json_str); - defer deinitItem(&item, allocator); - - const data = item.get("data").?; - const inner = data.M.get("key1").?; - try std.testing.expectEqualStrings("value1", inner.S); -} - -test "serialize item with deterministic ordering" { - const allocator = std.testing.allocator; - - const original = - \\{"pk":{"S":"test"},"num":{"N":"123"},"data":{"S":"value"}} - ; - - var item = try parseItem(allocator, original); - defer deinitItem(&item, allocator); - - const serialized = try serializeItem(allocator, item); - defer allocator.free(serialized); - - // Keys should be alphabetically sorted: data, num, pk - const expected = "{\"data\":{\"S\":\"value\"},\"num\":{\"N\":\"123\"},\"pk\":{\"S\":\"test\"}}"; - try std.testing.expectEqualStrings(expected, serialized); -} - -test "serialize nested map with deterministic ordering" { - const allocator = std.testing.allocator; - - const original = - \\{"outer":{"M":{"z":{"S":"last"},"a":{"S":"first"},"m":{"S":"middle"}}}} - ; - - var item = try parseItem(allocator, original); - defer deinitItem(&item, allocator); - - const serialized = try serializeItem(allocator, item); - defer allocator.free(serialized); - - // Inner map keys should also be sorted: a, m, z - const expected = "{\"outer\":{\"M\":{\"a\":{\"S\":\"first\"},\"m\":{\"S\":\"middle\"},\"z\":{\"S\":\"last\"}}}}"; - try std.testing.expectEqualStrings(expected, serialized); -} - -test "build rocksdb key with partition key only" { - const allocator = std.testing.allocator; - - const item_json = "{\"pk\":{\"S\":\"user123\"},\"data\":{\"S\":\"test\"}}"; - var item = try parseItem(allocator, item_json); - defer deinitItem(&item, allocator); - - const key_schema = [_]types.KeySchemaElement{ - .{ .attribute_name = "pk", .key_type = .HASH }, - }; - - const key = try buildRocksDBKey(allocator, "Users", &key_schema, item); - defer allocator.free(key); - - try std.testing.expectEqualStrings("_data:Users:user123", key); -} - -test "build rocksdb key with partition and sort keys" { - const allocator = std.testing.allocator; - - const item_json = "{\"pk\":{\"S\":\"user123\"},\"sk\":{\"S\":\"metadata\"}}"; - var item = try parseItem(allocator, item_json); - defer deinitItem(&item, allocator); - - const key_schema = [_]types.KeySchemaElement{ - .{ .attribute_name = "pk", .key_type = .HASH }, - .{ .attribute_name = "sk", .key_type = .RANGE }, - }; - - const key = try buildRocksDBKey(allocator, "Items", &key_schema, item); - defer allocator.free(key); - - try std.testing.expectEqualStrings("_data:Items:user123:metadata", key); -} - -test "reject key with separator" { - const allocator = std.testing.allocator; - - const item_json = "{\"pk\":{\"S\":\"user:123\"},\"data\":{\"S\":\"test\"}}"; - var item = try parseItem(allocator, item_json); - defer deinitItem(&item, allocator); - - const key_schema = [_]types.KeySchemaElement{ - .{ .attribute_name = "pk", .key_type = .HASH }, - }; - - const result = buildRocksDBKey(allocator, "Users", &key_schema, item); - try std.testing.expectError(error.KeyValueContainsSeparator, result); -} - -test "parseTableName from request" { - const allocator = std.testing.allocator; - - const request = "{\"TableName\":\"Users\",\"Item\":{}}"; - const table_name = try parseTableName(allocator, request); - - try std.testing.expectEqualStrings("Users", table_name); -} - -test "parseItemFromRequest" { - const allocator = std.testing.allocator; - - const request = "{\"TableName\":\"Users\",\"Item\":{\"pk\":{\"S\":\"test\"}}}"; - var item = try parseItemFromRequest(allocator, request); - defer deinitItem(&item, allocator); - - try std.testing.expectEqual(@as(usize, 1), item.count()); - const pk = item.get("pk").?; - try std.testing.expectEqualStrings("test", pk.S); -} - -test "parseKeyFromRequest" { - const allocator = std.testing.allocator; - - const request = "{\"TableName\":\"Users\",\"Key\":{\"pk\":{\"S\":\"user123\"}}}"; - var key = try parseKeyFromRequest(allocator, request); - defer deinitItem(&key, allocator); - - try std.testing.expectEqual(@as(usize, 1), key.count()); -} - -test "extractKeyAttributes deep copies" { - const allocator = std.testing.allocator; - - const item_json = "{\"pk\":{\"S\":\"user123\"},\"name\":{\"S\":\"Alice\"},\"age\":{\"N\":\"25\"}}"; - var item = try parseItem(allocator, item_json); - defer deinitItem(&item, allocator); - - const key_schema = [_]types.KeySchemaElement{ - .{ .attribute_name = "pk", .key_type = .HASH }, - }; - - var extracted = try extractKeyAttributes(allocator, item, &key_schema); - defer deinitItem(&extracted, allocator); - - try std.testing.expectEqual(@as(usize, 1), extracted.count()); - const pk = extracted.get("pk").?; - try std.testing.expectEqualStrings("user123", pk.S); -} diff --git a/src/dynamodb/storage.zig b/src/dynamodb/storage.zig index 14d31dc..0a9767d 100644 --- a/src/dynamodb/storage.zig +++ b/src/dynamodb/storage.zig @@ -3,6 +3,7 @@ const std = @import("std"); const rocksdb = @import("../rocksdb.zig"); const types = @import("types.zig"); const json = @import("json.zig"); +const key_codec = @import("../key_codec.zig"); pub const StorageError = error{ TableNotFound, @@ -10,25 +11,11 @@ pub const StorageError = error{ ItemNotFound, InvalidKey, MissingKeyAttribute, - KeyValueContainsSeparator, SerializationError, RocksDBError, OutOfMemory, }; -/// Key prefixes for different data types in RocksDB -/// PHASE 2 TODO: Replace textual prefixes with binary encoding using length-prefixed segments -const KeyPrefix = struct { - /// Table metadata: _meta:{table_name} - const meta = "_meta:"; - /// Item data: _data:{table_name}:{partition_key}[:{sort_key}] - const data = "_data:"; - /// Global secondary index: _gsi:{table_name}:{index_name}:{pk}:{sk} - const gsi = "_gsi:"; - /// Local secondary index: _lsi:{table_name}:{index_name}:{pk}:{sk} - const lsi = "_lsi:"; -}; - /// In-memory representation of table metadata const TableMetadata = struct { table_name: []const u8, @@ -77,7 +64,7 @@ pub const StorageEngine = struct { attribute_definitions: []const types.AttributeDefinition, ) StorageError!types.TableDescription { // Check if table already exists - const meta_key = try self.buildMetaKey(table_name); + const meta_key = try key_codec.buildMetaKey(self.allocator, table_name); defer self.allocator.free(meta_key); const existing = self.db.get(self.allocator, meta_key) catch return StorageError.RocksDBError; @@ -115,7 +102,7 @@ pub const StorageEngine = struct { } pub fn deleteTable(self: *Self, table_name: []const u8) StorageError!void { - const meta_key = try self.buildMetaKey(table_name); + const meta_key = try key_codec.buildMetaKey(self.allocator, table_name); defer self.allocator.free(meta_key); // Verify table exists @@ -124,7 +111,7 @@ pub const StorageEngine = struct { self.allocator.free(existing.?); // Delete all items with this table's prefix - const data_prefix = try self.buildDataPrefix(table_name); + const data_prefix = try key_codec.buildTablePrefix(self.allocator, table_name); defer self.allocator.free(data_prefix); var batch = rocksdb.WriteBatch.init() orelse return StorageError.RocksDBError; @@ -153,7 +140,7 @@ pub const StorageEngine = struct { defer metadata.deinit(self.allocator); // Count items (expensive, but matches DynamoDB behavior) - const data_prefix = try self.buildDataPrefix(table_name); + const data_prefix = try key_codec.buildTablePrefix(self.allocator, table_name); defer self.allocator.free(data_prefix); var item_count: u64 = 0; @@ -195,12 +182,21 @@ pub const StorageEngine = struct { var iter = rocksdb.Iterator.init(&self.db) orelse return StorageError.RocksDBError; defer iter.deinit(); - iter.seek(KeyPrefix.meta); + // Seek to metadata entity type + const meta_prefix = [_]u8{key_codec.EntityType.meta.toByte()}; + iter.seek(&meta_prefix); + while (iter.valid()) { const key = iter.key() orelse break; - if (!std.mem.startsWith(u8, key, KeyPrefix.meta)) break; - const table_name = key[KeyPrefix.meta.len..]; + // Check if still in metadata namespace + if (key.len == 0 or key[0] != key_codec.EntityType.meta.toByte()) break; + + // Decode key to extract table name + var decoder = key_codec.KeyDecoder.init(key); + _ = decoder.readEntityType() catch break; + const table_name = decoder.readSegmentBorrowed() catch break; + const owned_name = self.allocator.dupe(u8, table_name) catch return StorageError.OutOfMemory; tables.append(owned_name) catch return StorageError.OutOfMemory; @@ -219,25 +215,20 @@ pub const StorageEngine = struct { var metadata = try self.getTableMetadata(table_name); defer metadata.deinit(self.allocator); - // Validate that item contains all required key attributes - for (metadata.key_schema) |key_elem| { - if (!item.contains(key_elem.attribute_name)) { - return StorageError.MissingKeyAttribute; - } + // Extract key values from item + const key_values = try self.extractKeyValues(item, metadata.key_schema); + defer { + self.allocator.free(key_values.pk); + if (key_values.sk) |sk| self.allocator.free(sk); } - // Build storage key using the item and key schema - const storage_key = json.buildRocksDBKey( + // Build storage key using binary encoding + const storage_key = try key_codec.buildDataKey( self.allocator, table_name, - metadata.key_schema, - item, - ) catch |err| { - return switch (err) { - error.KeyValueContainsSeparator => StorageError.KeyValueContainsSeparator, - else => StorageError.InvalidKey, - }; - }; + key_values.pk, + key_values.sk, + ); defer self.allocator.free(storage_key); // Serialize item to canonical JSON for storage @@ -255,25 +246,20 @@ pub const StorageEngine = struct { var metadata = try self.getTableMetadata(table_name); defer metadata.deinit(self.allocator); - // Validate key has all required attributes - for (metadata.key_schema) |key_elem| { - if (!key.contains(key_elem.attribute_name)) { - return StorageError.MissingKeyAttribute; - } + // Extract key values + const key_values = try self.extractKeyValues(key, metadata.key_schema); + defer { + self.allocator.free(key_values.pk); + if (key_values.sk) |sk| self.allocator.free(sk); } // Build storage key - const storage_key = json.buildRocksDBKey( + const storage_key = try key_codec.buildDataKey( self.allocator, table_name, - metadata.key_schema, - key, - ) catch |err| { - return switch (err) { - error.KeyValueContainsSeparator => StorageError.KeyValueContainsSeparator, - else => StorageError.InvalidKey, - }; - }; + key_values.pk, + key_values.sk, + ); defer self.allocator.free(storage_key); const item_json = self.db.get(self.allocator, storage_key) catch return StorageError.RocksDBError; @@ -289,25 +275,20 @@ pub const StorageEngine = struct { var metadata = try self.getTableMetadata(table_name); defer metadata.deinit(self.allocator); - // Validate key - for (metadata.key_schema) |key_elem| { - if (!key.contains(key_elem.attribute_name)) { - return StorageError.MissingKeyAttribute; - } + // Extract key values + const key_values = try self.extractKeyValues(key, metadata.key_schema); + defer { + self.allocator.free(key_values.pk); + if (key_values.sk) |sk| self.allocator.free(sk); } // Build storage key - const storage_key = json.buildRocksDBKey( + const storage_key = try key_codec.buildDataKey( self.allocator, table_name, - metadata.key_schema, - key, - ) catch |err| { - return switch (err) { - error.KeyValueContainsSeparator => StorageError.KeyValueContainsSeparator, - else => StorageError.InvalidKey, - }; - }; + key_values.pk, + key_values.sk, + ); defer self.allocator.free(storage_key); self.db.delete(storage_key) catch return StorageError.RocksDBError; @@ -319,7 +300,7 @@ pub const StorageEngine = struct { var metadata = try self.getTableMetadata(table_name); defer metadata.deinit(self.allocator); - const data_prefix = try self.buildDataPrefix(table_name); + const data_prefix = try key_codec.buildTablePrefix(self.allocator, table_name); defer self.allocator.free(data_prefix); var items = std.ArrayList(types.Item).init(self.allocator); @@ -361,8 +342,8 @@ pub const StorageEngine = struct { var metadata = try self.getTableMetadata(table_name); defer metadata.deinit(self.allocator); - // Build prefix for this partition - const prefix = try self.buildPartitionPrefix(table_name, partition_key_value); + // Build prefix for this partition using binary encoding + const prefix = try key_codec.buildPartitionPrefix(self.allocator, table_name, partition_key_value); defer self.allocator.free(prefix); var items = std.ArrayList(types.Item).init(self.allocator); @@ -401,7 +382,7 @@ pub const StorageEngine = struct { // === Internal Helpers === fn getTableMetadata(self: *Self, table_name: []const u8) StorageError!TableMetadata { - const meta_key = try self.buildMetaKey(table_name); + const meta_key = try key_codec.buildMetaKey(self.allocator, table_name); defer self.allocator.free(meta_key); const meta_value = self.db.get(self.allocator, meta_key) catch return StorageError.RocksDBError; @@ -411,16 +392,42 @@ pub const StorageEngine = struct { return self.deserializeTableMetadata(meta_value.?); } - fn buildMetaKey(self: *Self, table_name: []const u8) StorageError![]u8 { - return std.fmt.allocPrint(self.allocator, "{s}{s}", .{ KeyPrefix.meta, table_name }) catch return StorageError.OutOfMemory; - } + const KeyValues = struct { + pk: []u8, + sk: ?[]u8, + }; - fn buildDataPrefix(self: *Self, table_name: []const u8) StorageError![]u8 { - return std.fmt.allocPrint(self.allocator, "{s}{s}:", .{ KeyPrefix.data, table_name }) catch return StorageError.OutOfMemory; - } + /// Extract partition key and sort key values from an item + /// Caller must free both pk and sk (if present) + fn extractKeyValues(self: *Self, item: types.Item, key_schema: []const types.KeySchemaElement) StorageError!KeyValues { + var pk: ?[]u8 = null; + var sk: ?[]u8 = null; - fn buildPartitionPrefix(self: *Self, table_name: []const u8, partition_key: []const u8) StorageError![]u8 { - return std.fmt.allocPrint(self.allocator, "{s}{s}:{s}", .{ KeyPrefix.data, table_name, partition_key }) catch return StorageError.OutOfMemory; + for (key_schema) |schema_element| { + const attr = item.get(schema_element.attribute_name) orelse + return StorageError.MissingKeyAttribute; + + // Extract string value from attribute + // DynamoDB keys must be S (string), N (number), or B (binary) + const value = switch (attr) { + .S => |s| s, + .N => |n| n, + .B => |b| b, + else => return StorageError.InvalidKey, + }; + + const owned_value = try self.allocator.dupe(u8, value); + + switch (schema_element.key_type) { + .HASH => pk = owned_value, + .RANGE => sk = owned_value, + } + } + + return KeyValues{ + .pk = pk orelse return StorageError.MissingKeyAttribute, + .sk = sk, + }; } // === Serialization === @@ -590,139 +597,3 @@ pub const StorageEngine = struct { }; } }; - -test "storage basic operations" { - const allocator = std.testing.allocator; - - const path = "/tmp/test_storage"; - defer std.fs.deleteTreeAbsolute(path) catch {}; - - var engine = try StorageEngine.init(allocator, path); - defer engine.deinit(); - - // Create table - const key_schema = [_]types.KeySchemaElement{ - .{ .attribute_name = "pk", .key_type = .HASH }, - }; - const attr_defs = [_]types.AttributeDefinition{ - .{ .attribute_name = "pk", .attribute_type = .S }, - }; - - _ = try engine.createTable("TestTable", &key_schema, &attr_defs); - - // List tables - const tables = try engine.listTables(); - defer { - for (tables) |t| allocator.free(t); - allocator.free(tables); - } - try std.testing.expectEqual(@as(usize, 1), tables.len); - try std.testing.expectEqualStrings("TestTable", tables[0]); - - // Delete table - try engine.deleteTable("TestTable"); - - // Verify deleted - const tables2 = try engine.listTables(); - defer allocator.free(tables2); - try std.testing.expectEqual(@as(usize, 0), tables2.len); -} - -test "putItem and getItem with typed Items" { - const allocator = std.testing.allocator; - - const path = "/tmp/test_storage_typed"; - defer std.fs.deleteTreeAbsolute(path) catch {}; - - var engine = try StorageEngine.init(allocator, path); - defer engine.deinit(); - - const key_schema = [_]types.KeySchemaElement{ - .{ .attribute_name = "pk", .key_type = .HASH }, - }; - const attr_defs = [_]types.AttributeDefinition{ - .{ .attribute_name = "pk", .attribute_type = .S }, - }; - - _ = try engine.createTable("Users", &key_schema, &attr_defs); - - // Create and put item - const item_json = "{\"pk\":{\"S\":\"user123\"},\"name\":{\"S\":\"Alice\"}}"; - var item = try json.parseItem(allocator, item_json); - defer json.deinitItem(&item, allocator); - - try engine.putItem("Users", item); - - // Get item back - const key_json = "{\"pk\":{\"S\":\"user123\"}}"; - var key = try json.parseItem(allocator, key_json); - defer json.deinitItem(&key, allocator); - - const retrieved = try engine.getItem("Users", key); - try std.testing.expect(retrieved != null); - defer if (retrieved) |*r| json.deinitItem(r, allocator); - - const pk = retrieved.?.get("pk").?; - try std.testing.expectEqualStrings("user123", pk.S); -} - -test "putItem validates key presence" { - const allocator = std.testing.allocator; - - const path = "/tmp/test_storage_validate"; - defer std.fs.deleteTreeAbsolute(path) catch {}; - - var engine = try StorageEngine.init(allocator, path); - defer engine.deinit(); - - const key_schema = [_]types.KeySchemaElement{ - .{ .attribute_name = "userId", .key_type = .HASH }, - }; - const attr_defs = [_]types.AttributeDefinition{ - .{ .attribute_name = "userId", .attribute_type = .S }, - }; - - _ = try engine.createTable("Users", &key_schema, &attr_defs); - - // This should fail - missing userId - const bad_item_json = "{\"name\":{\"S\":\"Alice\"}}"; - var bad_item = try json.parseItem(allocator, bad_item_json); - defer json.deinitItem(&bad_item, allocator); - - const result = engine.putItem("Users", bad_item); - try std.testing.expectError(StorageError.MissingKeyAttribute, result); - - // This should succeed - const good_item_json = "{\"userId\":{\"S\":\"user123\"},\"name\":{\"S\":\"Alice\"}}"; - var good_item = try json.parseItem(allocator, good_item_json); - defer json.deinitItem(&good_item, allocator); - - try engine.putItem("Users", good_item); -} - -test "reject key with separator" { - const allocator = std.testing.allocator; - - const path = "/tmp/test_storage_separator"; - defer std.fs.deleteTreeAbsolute(path) catch {}; - - var engine = try StorageEngine.init(allocator, path); - defer engine.deinit(); - - const key_schema = [_]types.KeySchemaElement{ - .{ .attribute_name = "pk", .key_type = .HASH }, - }; - const attr_defs = [_]types.AttributeDefinition{ - .{ .attribute_name = "pk", .attribute_type = .S }, - }; - - _ = try engine.createTable("Users", &key_schema, &attr_defs); - - // This should fail - pk contains ':' - const bad_item_json = "{\"pk\":{\"S\":\"user:123\"},\"data\":{\"S\":\"test\"}}"; - var bad_item = try json.parseItem(allocator, bad_item_json); - defer json.deinitItem(&bad_item, allocator); - - const result = engine.putItem("Users", bad_item); - try std.testing.expectError(StorageError.KeyValueContainsSeparator, result); -} diff --git a/src/dynamodb/types.zig b/src/dynamodb/types.zig index 528738d..c96a80a 100644 --- a/src/dynamodb/types.zig +++ b/src/dynamodb/types.zig @@ -242,9 +242,3 @@ pub const json = struct { try writer.writeByte('}'); } }; - -test "operation from target" { - try std.testing.expectEqual(Operation.CreateTable, Operation.fromTarget("DynamoDB_20120810.CreateTable")); - try std.testing.expectEqual(Operation.PutItem, Operation.fromTarget("DynamoDB_20120810.PutItem")); - try std.testing.expectEqual(Operation.Unknown, Operation.fromTarget("Invalid")); -} diff --git a/src/http.zig b/src/http.zig index 4d0900a..a0700d3 100644 --- a/src/http.zig +++ b/src/http.zig @@ -297,26 +297,3 @@ fn parseRequest(allocator: mem.Allocator, data: []const u8) !Request { .raw_data = data, }; } - -// Tests -test "parse simple request" { - const allocator = std.testing.allocator; - const raw = "GET /health HTTP/1.1\r\nHost: localhost\r\n\r\n"; - - const req = try parseRequest(allocator, raw); - defer allocator.free(req.headers); - - try std.testing.expectEqual(Method.GET, req.method); - try std.testing.expectEqualStrings("/health", req.path); -} - -test "parse request with body" { - const allocator = std.testing.allocator; - const raw = "POST /items HTTP/1.1\r\nHost: localhost\r\nContent-Length: 13\r\n\r\n{\"key\":\"val\"}"; - - const req = try parseRequest(allocator, raw); - defer allocator.free(req.headers); - - try std.testing.expectEqual(Method.POST, req.method); - try std.testing.expectEqualStrings("{\"key\":\"val\"}", req.body); -} diff --git a/src/key_codec.zig b/src/key_codec.zig new file mode 100644 index 0000000..840cd72 --- /dev/null +++ b/src/key_codec.zig @@ -0,0 +1,344 @@ +/// Binary-safe key encoding for RocksDB storage +/// Replaces text-based `:` separator with length-prefixed binary format +/// Format: [entity_type_byte][len(segment1)][segment1][len(segment2)][segment2]... +const std = @import("std"); +const types = @import("dynamodb/types.zig"); + +/// Entity type prefix bytes for namespacing +pub const EntityType = enum(u8) { + /// Table metadata: 0x01 + meta = 0x01, + /// Item data: 0x02 + data = 0x02, + /// Global secondary index: 0x03 + gsi = 0x03, + /// Local secondary index: 0x04 + lsi = 0x04, + + pub fn toByte(self: EntityType) u8 { + return @intFromEnum(self); + } +}; + +/// Key component representing a single segment in the key +pub const KeySegment = struct { + data: []const u8, + + pub fn init(data: []const u8) KeySegment { + return .{ .data = data }; + } + + /// Encode this segment with length prefix: [len][data] + /// Length is encoded as varint for space efficiency + pub fn encode(self: KeySegment, writer: anytype) !void { + try encodeVarint(writer, self.data.len); + try writer.writeAll(self.data); + } + + /// Calculate encoded size without actually encoding + pub fn encodedSize(self: KeySegment) usize { + return varintSize(self.data.len) + self.data.len; + } +}; + +/// Represents a complete storage key with all its components +pub const StorageKey = struct { + entity_type: EntityType, + segments: []const KeySegment, + + /// Encode the complete key: [entity_type][segment1][segment2]... + pub fn encode(self: StorageKey, allocator: std.mem.Allocator) ![]u8 { + var size: usize = 1; // entity type byte + for (self.segments) |seg| { + size += seg.encodedSize(); + } + + const buf = try allocator.alloc(u8, size); + var fbs = std.io.fixedBufferStream(buf); + const writer = fbs.writer(); + + try writer.writeByte(self.entity_type.toByte()); + for (self.segments) |seg| { + try seg.encode(writer); + } + + return buf; + } + + /// Calculate the encoded size without allocating + pub fn encodedSize(self: StorageKey) usize { + var size: usize = 1; // entity type byte + for (self.segments) |seg| { + size += seg.encodedSize(); + } + return size; + } +}; + +/// Decode a binary key back into its components +pub const KeyDecoder = struct { + data: []const u8, + pos: usize, + + pub fn init(data: []const u8) KeyDecoder { + return .{ .data = data, .pos = 0 }; + } + + /// Read the entity type byte + pub fn readEntityType(self: *KeyDecoder) !EntityType { + if (self.pos >= self.data.len) return error.UnexpectedEndOfKey; + const byte = self.data[self.pos]; + self.pos += 1; + return @enumFromInt(byte); + } + + /// Read the next segment + pub fn readSegment(self: *KeyDecoder, allocator: std.mem.Allocator) ![]u8 { + const len = try self.readVarint(); + if (self.pos + len > self.data.len) return error.UnexpectedEndOfKey; + + const segment = try allocator.dupe(u8, self.data[self.pos .. self.pos + len]); + self.pos += len; + return segment; + } + + /// Read segment without allocating (returns slice into original data) + pub fn readSegmentBorrowed(self: *KeyDecoder) ![]const u8 { + const len = try self.readVarint(); + if (self.pos + len > self.data.len) return error.UnexpectedEndOfKey; + + const segment = self.data[self.pos .. self.pos + len]; + self.pos += len; + return segment; + } + + /// Check if there are more bytes to read + pub fn hasMore(self: *KeyDecoder) bool { + return self.pos < self.data.len; + } + + fn readVarint(self: *KeyDecoder) !usize { + var result: usize = 0; + var shift: u6 = 0; + + while (self.pos < self.data.len) { + const byte = self.data[self.pos]; + self.pos += 1; + + result |= @as(usize, byte & 0x7F) << shift; + + if ((byte & 0x80) == 0) { + return result; + } + + shift += 7; + if (shift >= 64) return error.VarintOverflow; + } + + return error.UnexpectedEndOfKey; + } +}; + +/// Build a metadata key: [meta][table_name] +pub fn buildMetaKey(allocator: std.mem.Allocator, table_name: []const u8) ![]u8 { + const segments = [_]KeySegment{ + KeySegment.init(table_name), + }; + + const key = StorageKey{ + .entity_type = .meta, + .segments = &segments, + }; + + return try key.encode(allocator); +} + +/// Build a data key: [data][table_name][pk_value][sk_value?] +pub fn buildDataKey( + allocator: std.mem.Allocator, + table_name: []const u8, + pk_value: []const u8, + sk_value: ?[]const u8, +) ![]u8 { + if (sk_value) |sk| { + const segments = [_]KeySegment{ + KeySegment.init(table_name), + KeySegment.init(pk_value), + KeySegment.init(sk), + }; + const key = StorageKey{ + .entity_type = .data, + .segments = &segments, + }; + return try key.encode(allocator); + } else { + const segments = [_]KeySegment{ + KeySegment.init(table_name), + KeySegment.init(pk_value), + }; + const key = StorageKey{ + .entity_type = .data, + .segments = &segments, + }; + return try key.encode(allocator); + } +} + +/// Build a prefix for scanning all items in a table: [data][table_name] +pub fn buildTablePrefix(allocator: std.mem.Allocator, table_name: []const u8) ![]u8 { + const segments = [_]KeySegment{ + KeySegment.init(table_name), + }; + + const key = StorageKey{ + .entity_type = .data, + .segments = &segments, + }; + + return try key.encode(allocator); +} + +/// Build a prefix for querying by partition key: [data][table_name][pk_value] +pub fn buildPartitionPrefix( + allocator: std.mem.Allocator, + table_name: []const u8, + pk_value: []const u8, +) ![]u8 { + const segments = [_]KeySegment{ + KeySegment.init(table_name), + KeySegment.init(pk_value), + }; + + const key = StorageKey{ + .entity_type = .data, + .segments = &segments, + }; + + return try key.encode(allocator); +} + +/// Build a GSI key: [gsi][table_name][index_name][gsi_pk][gsi_sk?] -> stores primary key +pub fn buildGSIKey( + allocator: std.mem.Allocator, + table_name: []const u8, + index_name: []const u8, + gsi_pk: []const u8, + gsi_sk: ?[]const u8, +) ![]u8 { + if (gsi_sk) |sk| { + const segments = [_]KeySegment{ + KeySegment.init(table_name), + KeySegment.init(index_name), + KeySegment.init(gsi_pk), + KeySegment.init(sk), + }; + const key = StorageKey{ + .entity_type = .gsi, + .segments = &segments, + }; + return try key.encode(allocator); + } else { + const segments = [_]KeySegment{ + KeySegment.init(table_name), + KeySegment.init(index_name), + KeySegment.init(gsi_pk), + }; + const key = StorageKey{ + .entity_type = .gsi, + .segments = &segments, + }; + return try key.encode(allocator); + } +} + +/// Build an LSI key: [lsi][table_name][index_name][pk][lsi_sk] +pub fn buildLSIKey( + allocator: std.mem.Allocator, + table_name: []const u8, + index_name: []const u8, + pk: []const u8, + lsi_sk: []const u8, +) ![]u8 { + const segments = [_]KeySegment{ + KeySegment.init(table_name), + KeySegment.init(index_name), + KeySegment.init(pk), + KeySegment.init(lsi_sk), + }; + + const key = StorageKey{ + .entity_type = .lsi, + .segments = &segments, + }; + + return try key.encode(allocator); +} + +/// Debug helper: convert binary key to human-readable string +pub fn keyToDebugString(allocator: std.mem.Allocator, key: []const u8) ![]u8 { + var decoder = KeyDecoder.init(key); + + var buf = std.ArrayList(u8).init(allocator); + errdefer buf.deinit(); + const writer = buf.writer(); + + const entity_type = decoder.readEntityType() catch |err| { + try writer.print("INVALID_KEY: {}", .{err}); + return buf.toOwnedSlice(); + }; + + try writer.print("[{}]", .{entity_type}); + + var segment_num: usize = 0; + while (decoder.hasMore()) { + const segment = decoder.readSegmentBorrowed() catch |err| { + try writer.print(" ERROR:{}", .{err}); + break; + }; + + // Try to print as UTF-8, fall back to hex + if (std.unicode.utf8ValidateSlice(segment)) { + try writer.print(" '{s}'", .{segment}); + } else { + try writer.writeAll(" 0x"); + for (segment) |byte| { + try writer.print("{X:0>2}", .{byte}); + } + } + segment_num += 1; + } + + return buf.toOwnedSlice(); +} + +// ============================================================================ +// Varint encoding helpers (variable-length integer encoding) +// Uses LEB128 format: 7 bits per byte, MSB indicates continuation +// ============================================================================ + +fn encodeVarint(writer: anytype, value: usize) !void { + var v = value; + while (true) { + const byte = @as(u8, @intCast(v & 0x7F)); + v >>= 7; + + if (v == 0) { + try writer.writeByte(byte); + return; + } else { + try writer.writeByte(byte | 0x80); + } + } +} + +fn varintSize(value: usize) usize { + if (value == 0) return 1; + + var v = value; + var size: usize = 0; + while (v > 0) { + size += 1; + v >>= 7; + } + return size; +} diff --git a/src/rocksdb.zig b/src/rocksdb.zig index f0f1a02..21eaa61 100644 --- a/src/rocksdb.zig +++ b/src/rocksdb.zig @@ -231,78 +231,3 @@ pub const Iterator = struct { return v[0..len]; } }; - -// Tests -test "rocksdb basic operations" { - const allocator = std.testing.allocator; - - // Use temp directory - const path = "/tmp/test_rocksdb_basic"; - defer { - std.fs.deleteTreeAbsolute(path) catch {}; - } - - var db = try DB.open(path, true); - defer db.close(); - - // Put and get - try db.put("hello", "world"); - const val = try db.get(allocator, "hello"); - try std.testing.expectEqualStrings("world", val.?); - allocator.free(val.?); - - // Delete - try db.delete("hello"); - const deleted = try db.get(allocator, "hello"); - try std.testing.expect(deleted == null); -} - -test "rocksdb write batch" { - const allocator = std.testing.allocator; - - const path = "/tmp/test_rocksdb_batch"; - defer { - std.fs.deleteTreeAbsolute(path) catch {}; - } - - var db = try DB.open(path, true); - defer db.close(); - - var batch = WriteBatch.init() orelse unreachable; - defer batch.deinit(); - - batch.put("key1", "value1"); - batch.put("key2", "value2"); - batch.put("key3", "value3"); - - try batch.write(&db); - - const v1 = try db.get(allocator, "key1"); - defer if (v1) |v| allocator.free(v); - try std.testing.expectEqualStrings("value1", v1.?); -} - -test "rocksdb iterator" { - const path = "/tmp/test_rocksdb_iter"; - defer { - std.fs.deleteTreeAbsolute(path) catch {}; - } - - var db = try DB.open(path, true); - defer db.close(); - - try db.put("a", "1"); - try db.put("b", "2"); - try db.put("c", "3"); - - var iter = Iterator.init(&db) orelse unreachable; - defer iter.deinit(); - - iter.seekToFirst(); - - var count: usize = 0; - while (iter.valid()) : (iter.next()) { - count += 1; - } - try std.testing.expectEqual(@as(usize, 3), count); -}