492 lines
17 KiB
Zig
492 lines
17 KiB
Zig
/// DynamoDB Expression Parser
|
|
/// Parses KeyConditionExpression, FilterExpression, ProjectionExpression, etc.
|
|
/// Replaces the temporary string-search hack with proper expression parsing.
|
|
const std = @import("std");
|
|
const types = @import("types.zig");
|
|
const json_module = @import("json.zig");
|
|
|
|
// ============================================================================
|
|
// Key Condition Expression Parsing
|
|
// ============================================================================
|
|
|
|
/// Parsed key condition for Query operations
|
|
pub const KeyCondition = struct {
|
|
/// Partition key attribute name (from ExpressionAttributeNames or direct)
|
|
pk_name: []const u8,
|
|
/// Partition key value (owned)
|
|
pk_value: types.AttributeValue,
|
|
/// Sort key condition (optional)
|
|
sk_condition: ?SortKeyCondition,
|
|
|
|
pub fn deinit(self: *KeyCondition, allocator: std.mem.Allocator) void {
|
|
json_module.deinitAttributeValue(&self.pk_value, allocator);
|
|
if (self.sk_condition) |*sk| {
|
|
sk.deinit(allocator);
|
|
}
|
|
}
|
|
|
|
/// Get the raw partition key value bytes (for building storage keys)
|
|
pub fn getPkBytes(self: *const KeyCondition) ![]const u8 {
|
|
return switch (self.pk_value) {
|
|
.S => |s| s,
|
|
.N => |n| n,
|
|
.B => |b| b,
|
|
else => error.InvalidKeyType,
|
|
};
|
|
}
|
|
};
|
|
|
|
/// Sort key condition operators
|
|
pub const SortKeyOperator = enum {
|
|
EQ, // =
|
|
LT, // <
|
|
LE, // <=
|
|
GT, // >
|
|
GE, // >=
|
|
BETWEEN, // BETWEEN x AND y
|
|
BEGINS_WITH, // begins_with(sk, prefix)
|
|
};
|
|
|
|
/// Parsed sort key condition
|
|
pub const SortKeyCondition = struct {
|
|
/// Sort key attribute name
|
|
sk_name: []const u8,
|
|
/// Comparison operator
|
|
operator: SortKeyOperator,
|
|
/// Primary value (or lower bound for BETWEEN) - owned
|
|
value: types.AttributeValue,
|
|
/// Upper bound for BETWEEN operator - owned
|
|
value2: ?types.AttributeValue,
|
|
|
|
pub fn deinit(self: *SortKeyCondition, allocator: std.mem.Allocator) void {
|
|
json_module.deinitAttributeValue(&self.value, allocator);
|
|
if (self.value2) |*v2| {
|
|
json_module.deinitAttributeValue(v2, allocator);
|
|
}
|
|
}
|
|
};
|
|
|
|
/// Parse a KeyConditionExpression with ExpressionAttributeNames and ExpressionAttributeValues
|
|
/// Returns owned KeyCondition - caller must call deinit()
|
|
///
|
|
/// Supported formats:
|
|
/// - "pk = :pk"
|
|
/// - "#pk = :pk"
|
|
/// - "pk = :pk AND sk = :sk"
|
|
/// - "pk = :pk AND sk > :sk"
|
|
/// - "pk = :pk AND sk BETWEEN :sk1 AND :sk2"
|
|
/// - "pk = :pk AND begins_with(sk, :prefix)"
|
|
pub fn parseKeyConditionExpression(
|
|
allocator: std.mem.Allocator,
|
|
expression: []const u8,
|
|
attribute_names: ?std.StringHashMap([]const u8),
|
|
attribute_values: std.StringHashMap(types.AttributeValue),
|
|
) !KeyCondition {
|
|
var tokenizer = Tokenizer.init(expression);
|
|
|
|
// Parse partition key condition: pk_name = :pk_value
|
|
const pk_name_token = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
const pk_name = resolveAttributeName(pk_name_token, attribute_names) orelse return error.InvalidExpression;
|
|
|
|
const eq_token = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
if (!std.mem.eql(u8, eq_token, "=")) return error.InvalidExpression;
|
|
|
|
const pk_value_token = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
var pk_value = try resolveAttributeValue(allocator, pk_value_token, attribute_values);
|
|
errdefer json_module.deinitAttributeValue(&pk_value, allocator);
|
|
|
|
// Check for AND (sort key condition)
|
|
var sk_condition: ?SortKeyCondition = null;
|
|
if (tokenizer.nextToken()) |and_token| {
|
|
if (!std.ascii.eqlIgnoreCase(and_token, "AND")) {
|
|
return error.InvalidExpression;
|
|
}
|
|
|
|
sk_condition = try parseSortKeyCondition(allocator, &tokenizer, attribute_names, attribute_values);
|
|
}
|
|
|
|
return KeyCondition{
|
|
.pk_name = pk_name,
|
|
.pk_value = pk_value,
|
|
.sk_condition = sk_condition,
|
|
};
|
|
}
|
|
|
|
fn parseSortKeyCondition(
|
|
allocator: std.mem.Allocator,
|
|
tokenizer: *Tokenizer,
|
|
attribute_names: ?std.StringHashMap([]const u8),
|
|
attribute_values: std.StringHashMap(types.AttributeValue),
|
|
) !SortKeyCondition {
|
|
const first_token = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
|
|
// Check for begins_with(sk, :value)
|
|
if (std.ascii.eqlIgnoreCase(first_token, "begins_with")) {
|
|
return try parseBeginsWith(allocator, tokenizer, attribute_names, attribute_values);
|
|
}
|
|
|
|
// Otherwise it's: sk_name operator :value
|
|
const sk_name = resolveAttributeName(first_token, attribute_names) orelse return error.InvalidExpression;
|
|
|
|
const op_token = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
const operator = parseOperator(op_token) orelse return error.InvalidExpression;
|
|
|
|
const value_token = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
var value = try resolveAttributeValue(allocator, value_token, attribute_values);
|
|
errdefer json_module.deinitAttributeValue(&value, allocator);
|
|
|
|
// Check for BETWEEN ... AND ...
|
|
var value2: ?types.AttributeValue = null;
|
|
if (operator == .BETWEEN) {
|
|
const and_token = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
if (!std.ascii.eqlIgnoreCase(and_token, "AND")) {
|
|
return error.InvalidExpression;
|
|
}
|
|
|
|
const value2_token = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
value2 = try resolveAttributeValue(allocator, value2_token, attribute_values);
|
|
}
|
|
|
|
return SortKeyCondition{
|
|
.sk_name = sk_name,
|
|
.operator = operator,
|
|
.value = value,
|
|
.value2 = value2,
|
|
};
|
|
}
|
|
|
|
fn parseBeginsWith(
|
|
allocator: std.mem.Allocator,
|
|
tokenizer: *Tokenizer,
|
|
attribute_names: ?std.StringHashMap([]const u8),
|
|
attribute_values: std.StringHashMap(types.AttributeValue),
|
|
) !SortKeyCondition {
|
|
// Expect: ( sk_name , :value )
|
|
const lparen = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
if (!std.mem.eql(u8, lparen, "(")) return error.InvalidExpression;
|
|
|
|
const sk_name_token = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
const sk_name = resolveAttributeName(sk_name_token, attribute_names) orelse return error.InvalidExpression;
|
|
|
|
const comma = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
if (!std.mem.eql(u8, comma, ",")) return error.InvalidExpression;
|
|
|
|
const value_token = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
var value = try resolveAttributeValue(allocator, value_token, attribute_values);
|
|
errdefer json_module.deinitAttributeValue(&value, allocator);
|
|
|
|
const rparen = tokenizer.nextToken() orelse return error.InvalidExpression;
|
|
if (!std.mem.eql(u8, rparen, ")")) return error.InvalidExpression;
|
|
|
|
return SortKeyCondition{
|
|
.sk_name = sk_name,
|
|
.operator = .BEGINS_WITH,
|
|
.value = value,
|
|
.value2 = null,
|
|
};
|
|
}
|
|
|
|
fn parseOperator(token: []const u8) ?SortKeyOperator {
|
|
if (std.mem.eql(u8, token, "=")) return .EQ;
|
|
if (std.mem.eql(u8, token, "<")) return .LT;
|
|
if (std.mem.eql(u8, token, "<=")) return .LE;
|
|
if (std.mem.eql(u8, token, ">")) return .GT;
|
|
if (std.mem.eql(u8, token, ">=")) return .GE;
|
|
if (std.ascii.eqlIgnoreCase(token, "BETWEEN")) return .BETWEEN;
|
|
return null;
|
|
}
|
|
|
|
fn resolveAttributeName(token: []const u8, names: ?std.StringHashMap([]const u8)) ?[]const u8 {
|
|
if (token.len > 0 and token[0] == '#') {
|
|
// Expression attribute name placeholder
|
|
if (names) |n| {
|
|
return n.get(token);
|
|
}
|
|
return null;
|
|
}
|
|
// Direct attribute name
|
|
return token;
|
|
}
|
|
|
|
fn resolveAttributeValue(
|
|
allocator: std.mem.Allocator,
|
|
token: []const u8,
|
|
values: std.StringHashMap(types.AttributeValue),
|
|
) !types.AttributeValue {
|
|
if (token.len > 0 and token[0] == ':') {
|
|
// Expression attribute value placeholder
|
|
const original = values.get(token) orelse return error.MissingAttributeValue;
|
|
return try json_module.deepCopyAttributeValue(allocator, original);
|
|
}
|
|
return error.InvalidExpression;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Request Parsing Helpers
|
|
// ============================================================================
|
|
|
|
/// Parse ExpressionAttributeNames from request body
|
|
/// Returns null if not present
|
|
pub fn parseExpressionAttributeNames(
|
|
allocator: std.mem.Allocator,
|
|
request_body: []const u8,
|
|
) !?std.StringHashMap([]const u8) {
|
|
const parsed = std.json.parseFromSlice(std.json.Value, allocator, request_body, .{}) catch return null;
|
|
defer parsed.deinit();
|
|
|
|
const root = switch (parsed.value) {
|
|
.object => |o| o,
|
|
else => return null,
|
|
};
|
|
|
|
const names_val = root.get("ExpressionAttributeNames") orelse return null;
|
|
const names_obj = switch (names_val) {
|
|
.object => |o| o,
|
|
else => return null,
|
|
};
|
|
|
|
var result = std.StringHashMap([]const u8).init(allocator);
|
|
errdefer {
|
|
var iter = result.iterator();
|
|
while (iter.next()) |entry| {
|
|
allocator.free(entry.key_ptr.*);
|
|
allocator.free(entry.value_ptr.*);
|
|
}
|
|
result.deinit();
|
|
}
|
|
|
|
var iter = names_obj.iterator();
|
|
while (iter.next()) |entry| {
|
|
const key = try allocator.dupe(u8, entry.key_ptr.*);
|
|
errdefer allocator.free(key);
|
|
|
|
const value = switch (entry.value_ptr.*) {
|
|
.string => |s| try allocator.dupe(u8, s),
|
|
else => {
|
|
allocator.free(key);
|
|
continue;
|
|
},
|
|
};
|
|
|
|
try result.put(key, value);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// Parse ExpressionAttributeValues from request body
|
|
/// Returns owned HashMap - caller must free
|
|
pub fn parseExpressionAttributeValues(
|
|
allocator: std.mem.Allocator,
|
|
request_body: []const u8,
|
|
) !std.StringHashMap(types.AttributeValue) {
|
|
const parsed = std.json.parseFromSlice(std.json.Value, allocator, request_body, .{}) catch
|
|
return std.StringHashMap(types.AttributeValue).init(allocator);
|
|
defer parsed.deinit();
|
|
|
|
const root = switch (parsed.value) {
|
|
.object => |o| o,
|
|
else => return std.StringHashMap(types.AttributeValue).init(allocator),
|
|
};
|
|
|
|
const values_val = root.get("ExpressionAttributeValues") orelse
|
|
return std.StringHashMap(types.AttributeValue).init(allocator);
|
|
const values_obj = switch (values_val) {
|
|
.object => |o| o,
|
|
else => return std.StringHashMap(types.AttributeValue).init(allocator),
|
|
};
|
|
|
|
var result = std.StringHashMap(types.AttributeValue).init(allocator);
|
|
errdefer {
|
|
var iter = result.iterator();
|
|
while (iter.next()) |entry| {
|
|
allocator.free(entry.key_ptr.*);
|
|
json_module.deinitAttributeValue(entry.value_ptr, allocator);
|
|
}
|
|
result.deinit();
|
|
}
|
|
|
|
var iter = values_obj.iterator();
|
|
while (iter.next()) |entry| {
|
|
const key = try allocator.dupe(u8, entry.key_ptr.*);
|
|
errdefer allocator.free(key);
|
|
|
|
var value = json_module.parseAttributeValue(allocator, entry.value_ptr.*) catch continue;
|
|
errdefer json_module.deinitAttributeValue(&value, allocator);
|
|
|
|
try result.put(key, value);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// Parse KeyConditionExpression string from request body
|
|
pub fn parseKeyConditionExpressionString(
|
|
request_body: []const u8,
|
|
) ?[]const u8 {
|
|
// Use a simple search to avoid allocation for this common operation
|
|
const marker = "\"KeyConditionExpression\"";
|
|
const start_idx = std.mem.indexOf(u8, request_body, marker) orelse return null;
|
|
|
|
// Find the colon after the key
|
|
const colon_idx = std.mem.indexOfPos(u8, request_body, start_idx + marker.len, ":") orelse return null;
|
|
|
|
// Find the opening quote
|
|
var pos = colon_idx + 1;
|
|
while (pos < request_body.len and request_body[pos] != '"') : (pos += 1) {}
|
|
if (pos >= request_body.len) return null;
|
|
pos += 1; // Skip opening quote
|
|
|
|
// Find the closing quote (handle escaped quotes)
|
|
const value_start = pos;
|
|
while (pos < request_body.len) {
|
|
if (request_body[pos] == '"' and (pos == 0 or request_body[pos - 1] != '\\')) {
|
|
return request_body[value_start..pos];
|
|
}
|
|
pos += 1;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/// Convenience function to parse and evaluate a complete Query key condition
|
|
/// Returns owned KeyCondition - caller must call deinit()
|
|
pub fn parseQueryKeyCondition(
|
|
allocator: std.mem.Allocator,
|
|
request_body: []const u8,
|
|
) !?KeyCondition {
|
|
// Parse expression string
|
|
const expression = parseKeyConditionExpressionString(request_body) orelse return null;
|
|
|
|
// Parse attribute names (optional)
|
|
var attr_names = try parseExpressionAttributeNames(allocator, request_body);
|
|
defer if (attr_names) |*names| {
|
|
deinitExpressionAttributeNames(names, allocator);
|
|
};
|
|
|
|
// Parse attribute values
|
|
var attr_values = try parseExpressionAttributeValues(allocator, request_body);
|
|
defer deinitExpressionAttributeValues(&attr_values, allocator);
|
|
|
|
return try parseKeyConditionExpression(allocator, expression, attr_names, attr_values);
|
|
}
|
|
|
|
// ============================================================================
|
|
// Simple Tokenizer
|
|
// ============================================================================
|
|
|
|
const Tokenizer = struct {
|
|
input: []const u8,
|
|
pos: usize,
|
|
|
|
pub fn init(input: []const u8) Tokenizer {
|
|
return .{ .input = input, .pos = 0 };
|
|
}
|
|
|
|
pub fn nextToken(self: *Tokenizer) ?[]const u8 {
|
|
// Skip whitespace
|
|
while (self.pos < self.input.len and std.ascii.isWhitespace(self.input[self.pos])) {
|
|
self.pos += 1;
|
|
}
|
|
|
|
if (self.pos >= self.input.len) return null;
|
|
|
|
const start = self.pos;
|
|
|
|
// Single-character tokens
|
|
const c = self.input[self.pos];
|
|
if (c == '(' or c == ')' or c == ',') {
|
|
self.pos += 1;
|
|
return self.input[start..self.pos];
|
|
}
|
|
|
|
// Two-character operators
|
|
if (self.pos + 1 < self.input.len) {
|
|
const two = self.input[self.pos .. self.pos + 2];
|
|
if (std.mem.eql(u8, two, "<=") or std.mem.eql(u8, two, ">=") or std.mem.eql(u8, two, "<>")) {
|
|
self.pos += 2;
|
|
return two;
|
|
}
|
|
}
|
|
|
|
// Single-character operators
|
|
if (c == '=' or c == '<' or c == '>') {
|
|
self.pos += 1;
|
|
return self.input[start..self.pos];
|
|
}
|
|
|
|
// Identifier or keyword (includes :placeholder and #name)
|
|
while (self.pos < self.input.len) {
|
|
const ch = self.input[self.pos];
|
|
if (std.ascii.isAlphanumeric(ch) or ch == '_' or ch == ':' or ch == '#' or ch == '-') {
|
|
self.pos += 1;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (self.pos > start) {
|
|
return self.input[start..self.pos];
|
|
}
|
|
|
|
// Unknown character, skip it
|
|
self.pos += 1;
|
|
return self.nextToken();
|
|
}
|
|
};
|
|
|
|
// ============================================================================
|
|
// Helpers for freeing parsed expression data
|
|
// ============================================================================
|
|
|
|
pub fn deinitExpressionAttributeNames(names: *std.StringHashMap([]const u8), allocator: std.mem.Allocator) void {
|
|
var iter = names.iterator();
|
|
while (iter.next()) |entry| {
|
|
allocator.free(entry.key_ptr.*);
|
|
allocator.free(entry.value_ptr.*);
|
|
}
|
|
names.deinit();
|
|
}
|
|
|
|
pub fn deinitExpressionAttributeValues(values: *std.StringHashMap(types.AttributeValue), allocator: std.mem.Allocator) void {
|
|
var iter = values.iterator();
|
|
while (iter.next()) |entry| {
|
|
allocator.free(entry.key_ptr.*);
|
|
json_module.deinitAttributeValue(entry.value_ptr, allocator);
|
|
}
|
|
values.deinit();
|
|
}
|
|
|
|
// ============================================================================
|
|
// Tests
|
|
// ============================================================================
|
|
|
|
test "tokenizer basic" {
|
|
var t = Tokenizer.init("pk = :pk AND sk > :sk");
|
|
|
|
try std.testing.expectEqualStrings("pk", t.nextToken().?);
|
|
try std.testing.expectEqualStrings("=", t.nextToken().?);
|
|
try std.testing.expectEqualStrings(":pk", t.nextToken().?);
|
|
try std.testing.expectEqualStrings("AND", t.nextToken().?);
|
|
try std.testing.expectEqualStrings("sk", t.nextToken().?);
|
|
try std.testing.expectEqualStrings(">", t.nextToken().?);
|
|
try std.testing.expectEqualStrings(":sk", t.nextToken().?);
|
|
try std.testing.expect(t.nextToken() == null);
|
|
}
|
|
|
|
test "tokenizer begins_with" {
|
|
var t = Tokenizer.init("pk = :pk AND begins_with(sk, :prefix)");
|
|
|
|
try std.testing.expectEqualStrings("pk", t.nextToken().?);
|
|
try std.testing.expectEqualStrings("=", t.nextToken().?);
|
|
try std.testing.expectEqualStrings(":pk", t.nextToken().?);
|
|
try std.testing.expectEqualStrings("AND", t.nextToken().?);
|
|
try std.testing.expectEqualStrings("begins_with", t.nextToken().?);
|
|
try std.testing.expectEqualStrings("(", t.nextToken().?);
|
|
try std.testing.expectEqualStrings("sk", t.nextToken().?);
|
|
try std.testing.expectEqualStrings(",", t.nextToken().?);
|
|
try std.testing.expectEqualStrings(":prefix", t.nextToken().?);
|
|
try std.testing.expectEqualStrings(")", t.nextToken().?);
|
|
try std.testing.expect(t.nextToken() == null);
|
|
}
|