/// DynamoDB Expression Parser /// Parses KeyConditionExpression, FilterExpression, ProjectionExpression, etc. /// Replaces the temporary string-search hack with proper expression parsing. const std = @import("std"); const types = @import("types.zig"); const json_module = @import("json.zig"); // ============================================================================ // Key Condition Expression Parsing // ============================================================================ /// Parsed key condition for Query operations pub const KeyCondition = struct { /// Partition key attribute name (from ExpressionAttributeNames or direct) pk_name: []const u8, /// Partition key value (owned) pk_value: types.AttributeValue, /// Sort key condition (optional) sk_condition: ?SortKeyCondition, pub fn deinit(self: *KeyCondition, allocator: std.mem.Allocator) void { json_module.deinitAttributeValue(&self.pk_value, allocator); if (self.sk_condition) |*sk| { sk.deinit(allocator); } } /// Get the raw partition key value bytes (for building storage keys) pub fn getPkBytes(self: *const KeyCondition) ![]const u8 { return switch (self.pk_value) { .S => |s| s, .N => |n| n, .B => |b| b, else => error.InvalidKeyType, }; } }; /// Sort key condition operators pub const SortKeyOperator = enum { EQ, // = LT, // < LE, // <= GT, // > GE, // >= BETWEEN, // BETWEEN x AND y BEGINS_WITH, // begins_with(sk, prefix) }; /// Parsed sort key condition pub const SortKeyCondition = struct { /// Sort key attribute name sk_name: []const u8, /// Comparison operator operator: SortKeyOperator, /// Primary value (or lower bound for BETWEEN) - owned value: types.AttributeValue, /// Upper bound for BETWEEN operator - owned value2: ?types.AttributeValue, pub fn deinit(self: *SortKeyCondition, allocator: std.mem.Allocator) void { json_module.deinitAttributeValue(&self.value, allocator); if (self.value2) |*v2| { json_module.deinitAttributeValue(v2, allocator); } } }; /// Parse a KeyConditionExpression with ExpressionAttributeNames and ExpressionAttributeValues /// Returns owned KeyCondition - caller must call deinit() /// /// Supported formats: /// - "pk = :pk" /// - "#pk = :pk" /// - "pk = :pk AND sk = :sk" /// - "pk = :pk AND sk > :sk" /// - "pk = :pk AND sk BETWEEN :sk1 AND :sk2" /// - "pk = :pk AND begins_with(sk, :prefix)" pub fn parseKeyConditionExpression( allocator: std.mem.Allocator, expression: []const u8, attribute_names: ?std.StringHashMap([]const u8), attribute_values: std.StringHashMap(types.AttributeValue), ) !KeyCondition { var tokenizer = Tokenizer.init(expression); // Parse partition key condition: pk_name = :pk_value const pk_name_token = tokenizer.nextToken() orelse return error.InvalidExpression; const pk_name = resolveAttributeName(pk_name_token, attribute_names) orelse return error.InvalidExpression; const eq_token = tokenizer.nextToken() orelse return error.InvalidExpression; if (!std.mem.eql(u8, eq_token, "=")) return error.InvalidExpression; const pk_value_token = tokenizer.nextToken() orelse return error.InvalidExpression; var pk_value = try resolveAttributeValue(allocator, pk_value_token, attribute_values); errdefer json_module.deinitAttributeValue(&pk_value, allocator); // Check for AND (sort key condition) var sk_condition: ?SortKeyCondition = null; if (tokenizer.nextToken()) |and_token| { if (!std.ascii.eqlIgnoreCase(and_token, "AND")) { return error.InvalidExpression; } sk_condition = try parseSortKeyCondition(allocator, &tokenizer, attribute_names, attribute_values); } return KeyCondition{ .pk_name = pk_name, .pk_value = pk_value, .sk_condition = sk_condition, }; } fn parseSortKeyCondition( allocator: std.mem.Allocator, tokenizer: *Tokenizer, attribute_names: ?std.StringHashMap([]const u8), attribute_values: std.StringHashMap(types.AttributeValue), ) !SortKeyCondition { const first_token = tokenizer.nextToken() orelse return error.InvalidExpression; // Check for begins_with(sk, :value) if (std.ascii.eqlIgnoreCase(first_token, "begins_with")) { return try parseBeginsWith(allocator, tokenizer, attribute_names, attribute_values); } // Otherwise it's: sk_name operator :value const sk_name = resolveAttributeName(first_token, attribute_names) orelse return error.InvalidExpression; const op_token = tokenizer.nextToken() orelse return error.InvalidExpression; const operator = parseOperator(op_token) orelse return error.InvalidExpression; const value_token = tokenizer.nextToken() orelse return error.InvalidExpression; var value = try resolveAttributeValue(allocator, value_token, attribute_values); errdefer json_module.deinitAttributeValue(&value, allocator); // Check for BETWEEN ... AND ... var value2: ?types.AttributeValue = null; if (operator == .BETWEEN) { const and_token = tokenizer.nextToken() orelse return error.InvalidExpression; if (!std.ascii.eqlIgnoreCase(and_token, "AND")) { return error.InvalidExpression; } const value2_token = tokenizer.nextToken() orelse return error.InvalidExpression; value2 = try resolveAttributeValue(allocator, value2_token, attribute_values); } return SortKeyCondition{ .sk_name = sk_name, .operator = operator, .value = value, .value2 = value2, }; } fn parseBeginsWith( allocator: std.mem.Allocator, tokenizer: *Tokenizer, attribute_names: ?std.StringHashMap([]const u8), attribute_values: std.StringHashMap(types.AttributeValue), ) !SortKeyCondition { // Expect: ( sk_name , :value ) const lparen = tokenizer.nextToken() orelse return error.InvalidExpression; if (!std.mem.eql(u8, lparen, "(")) return error.InvalidExpression; const sk_name_token = tokenizer.nextToken() orelse return error.InvalidExpression; const sk_name = resolveAttributeName(sk_name_token, attribute_names) orelse return error.InvalidExpression; const comma = tokenizer.nextToken() orelse return error.InvalidExpression; if (!std.mem.eql(u8, comma, ",")) return error.InvalidExpression; const value_token = tokenizer.nextToken() orelse return error.InvalidExpression; var value = try resolveAttributeValue(allocator, value_token, attribute_values); errdefer json_module.deinitAttributeValue(&value, allocator); const rparen = tokenizer.nextToken() orelse return error.InvalidExpression; if (!std.mem.eql(u8, rparen, ")")) return error.InvalidExpression; return SortKeyCondition{ .sk_name = sk_name, .operator = .BEGINS_WITH, .value = value, .value2 = null, }; } fn parseOperator(token: []const u8) ?SortKeyOperator { if (std.mem.eql(u8, token, "=")) return .EQ; if (std.mem.eql(u8, token, "<")) return .LT; if (std.mem.eql(u8, token, "<=")) return .LE; if (std.mem.eql(u8, token, ">")) return .GT; if (std.mem.eql(u8, token, ">=")) return .GE; if (std.ascii.eqlIgnoreCase(token, "BETWEEN")) return .BETWEEN; return null; } fn resolveAttributeName(token: []const u8, names: ?std.StringHashMap([]const u8)) ?[]const u8 { if (token.len > 0 and token[0] == '#') { // Expression attribute name placeholder if (names) |n| { return n.get(token); } return null; } // Direct attribute name return token; } fn resolveAttributeValue( allocator: std.mem.Allocator, token: []const u8, values: std.StringHashMap(types.AttributeValue), ) !types.AttributeValue { if (token.len > 0 and token[0] == ':') { // Expression attribute value placeholder const original = values.get(token) orelse return error.MissingAttributeValue; return try json_module.deepCopyAttributeValue(allocator, original); } return error.InvalidExpression; } // ============================================================================ // Request Parsing Helpers // ============================================================================ /// Parse ExpressionAttributeNames from request body /// Returns null if not present pub fn parseExpressionAttributeNames( allocator: std.mem.Allocator, request_body: []const u8, ) !?std.StringHashMap([]const u8) { const parsed = std.json.parseFromSlice(std.json.Value, allocator, request_body, .{}) catch return null; defer parsed.deinit(); const root = switch (parsed.value) { .object => |o| o, else => return null, }; const names_val = root.get("ExpressionAttributeNames") orelse return null; const names_obj = switch (names_val) { .object => |o| o, else => return null, }; var result = std.StringHashMap([]const u8).init(allocator); errdefer { var iter = result.iterator(); while (iter.next()) |entry| { allocator.free(entry.key_ptr.*); allocator.free(entry.value_ptr.*); } result.deinit(); } var iter = names_obj.iterator(); while (iter.next()) |entry| { const key = try allocator.dupe(u8, entry.key_ptr.*); errdefer allocator.free(key); const value = switch (entry.value_ptr.*) { .string => |s| try allocator.dupe(u8, s), else => { allocator.free(key); continue; }, }; try result.put(key, value); } return result; } /// Parse ExpressionAttributeValues from request body /// Returns owned HashMap - caller must free pub fn parseExpressionAttributeValues( allocator: std.mem.Allocator, request_body: []const u8, ) !std.StringHashMap(types.AttributeValue) { const parsed = std.json.parseFromSlice(std.json.Value, allocator, request_body, .{}) catch return std.StringHashMap(types.AttributeValue).init(allocator); defer parsed.deinit(); const root = switch (parsed.value) { .object => |o| o, else => return std.StringHashMap(types.AttributeValue).init(allocator), }; const values_val = root.get("ExpressionAttributeValues") orelse return std.StringHashMap(types.AttributeValue).init(allocator); const values_obj = switch (values_val) { .object => |o| o, else => return std.StringHashMap(types.AttributeValue).init(allocator), }; var result = std.StringHashMap(types.AttributeValue).init(allocator); errdefer { var iter = result.iterator(); while (iter.next()) |entry| { allocator.free(entry.key_ptr.*); json_module.deinitAttributeValue(entry.value_ptr, allocator); } result.deinit(); } var iter = values_obj.iterator(); while (iter.next()) |entry| { const key = try allocator.dupe(u8, entry.key_ptr.*); errdefer allocator.free(key); var value = json_module.parseAttributeValue(allocator, entry.value_ptr.*) catch continue; errdefer json_module.deinitAttributeValue(&value, allocator); try result.put(key, value); } return result; } /// Parse KeyConditionExpression string from request body pub fn parseKeyConditionExpressionString( request_body: []const u8, ) ?[]const u8 { // Use a simple search to avoid allocation for this common operation const marker = "\"KeyConditionExpression\""; const start_idx = std.mem.indexOf(u8, request_body, marker) orelse return null; // Find the colon after the key const colon_idx = std.mem.indexOfPos(u8, request_body, start_idx + marker.len, ":") orelse return null; // Find the opening quote var pos = colon_idx + 1; while (pos < request_body.len and request_body[pos] != '"') : (pos += 1) {} if (pos >= request_body.len) return null; pos += 1; // Skip opening quote // Find the closing quote (handle escaped quotes) const value_start = pos; while (pos < request_body.len) { if (request_body[pos] == '"' and (pos == 0 or request_body[pos - 1] != '\\')) { return request_body[value_start..pos]; } pos += 1; } return null; } /// Convenience function to parse and evaluate a complete Query key condition /// Returns owned KeyCondition - caller must call deinit() pub fn parseQueryKeyCondition( allocator: std.mem.Allocator, request_body: []const u8, ) !?KeyCondition { // Parse expression string const expression = parseKeyConditionExpressionString(request_body) orelse return null; // Parse attribute names (optional) var attr_names = try parseExpressionAttributeNames(allocator, request_body); defer if (attr_names) |*names| { deinitExpressionAttributeNames(names, allocator); }; // Parse attribute values var attr_values = try parseExpressionAttributeValues(allocator, request_body); defer deinitExpressionAttributeValues(&attr_values, allocator); return try parseKeyConditionExpression(allocator, expression, attr_names, attr_values); } // ============================================================================ // Simple Tokenizer // ============================================================================ const Tokenizer = struct { input: []const u8, pos: usize, pub fn init(input: []const u8) Tokenizer { return .{ .input = input, .pos = 0 }; } pub fn nextToken(self: *Tokenizer) ?[]const u8 { // Skip whitespace while (self.pos < self.input.len and std.ascii.isWhitespace(self.input[self.pos])) { self.pos += 1; } if (self.pos >= self.input.len) return null; const start = self.pos; // Single-character tokens const c = self.input[self.pos]; if (c == '(' or c == ')' or c == ',') { self.pos += 1; return self.input[start..self.pos]; } // Two-character operators if (self.pos + 1 < self.input.len) { const two = self.input[self.pos .. self.pos + 2]; if (std.mem.eql(u8, two, "<=") or std.mem.eql(u8, two, ">=") or std.mem.eql(u8, two, "<>")) { self.pos += 2; return two; } } // Single-character operators if (c == '=' or c == '<' or c == '>') { self.pos += 1; return self.input[start..self.pos]; } // Identifier or keyword (includes :placeholder and #name) while (self.pos < self.input.len) { const ch = self.input[self.pos]; if (std.ascii.isAlphanumeric(ch) or ch == '_' or ch == ':' or ch == '#' or ch == '-') { self.pos += 1; } else { break; } } if (self.pos > start) { return self.input[start..self.pos]; } // Unknown character, skip it self.pos += 1; return self.nextToken(); } }; // ============================================================================ // Helpers for freeing parsed expression data // ============================================================================ pub fn deinitExpressionAttributeNames(names: *std.StringHashMap([]const u8), allocator: std.mem.Allocator) void { var iter = names.iterator(); while (iter.next()) |entry| { allocator.free(entry.key_ptr.*); allocator.free(entry.value_ptr.*); } names.deinit(); } pub fn deinitExpressionAttributeValues(values: *std.StringHashMap(types.AttributeValue), allocator: std.mem.Allocator) void { var iter = values.iterator(); while (iter.next()) |entry| { allocator.free(entry.key_ptr.*); json_module.deinitAttributeValue(entry.value_ptr, allocator); } values.deinit(); } // ============================================================================ // Tests // ============================================================================ test "tokenizer basic" { var t = Tokenizer.init("pk = :pk AND sk > :sk"); try std.testing.expectEqualStrings("pk", t.nextToken().?); try std.testing.expectEqualStrings("=", t.nextToken().?); try std.testing.expectEqualStrings(":pk", t.nextToken().?); try std.testing.expectEqualStrings("AND", t.nextToken().?); try std.testing.expectEqualStrings("sk", t.nextToken().?); try std.testing.expectEqualStrings(">", t.nextToken().?); try std.testing.expectEqualStrings(":sk", t.nextToken().?); try std.testing.expect(t.nextToken() == null); } test "tokenizer begins_with" { var t = Tokenizer.init("pk = :pk AND begins_with(sk, :prefix)"); try std.testing.expectEqualStrings("pk", t.nextToken().?); try std.testing.expectEqualStrings("=", t.nextToken().?); try std.testing.expectEqualStrings(":pk", t.nextToken().?); try std.testing.expectEqualStrings("AND", t.nextToken().?); try std.testing.expectEqualStrings("begins_with", t.nextToken().?); try std.testing.expectEqualStrings("(", t.nextToken().?); try std.testing.expectEqualStrings("sk", t.nextToken().?); try std.testing.expectEqualStrings(",", t.nextToken().?); try std.testing.expectEqualStrings(":prefix", t.nextToken().?); try std.testing.expectEqualStrings(")", t.nextToken().?); try std.testing.expect(t.nextToken() == null); }