Files
zyna-db/src/dynamodb/expression.zig

492 lines
17 KiB
Zig
Raw Normal View History

/// DynamoDB Expression Parser
/// Parses KeyConditionExpression, FilterExpression, ProjectionExpression, etc.
/// Replaces the temporary string-search hack with proper expression parsing.
const std = @import("std");
const types = @import("types.zig");
const json_module = @import("json.zig");
// ============================================================================
// Key Condition Expression Parsing
// ============================================================================
/// Parsed key condition for Query operations
pub const KeyCondition = struct {
/// Partition key attribute name (from ExpressionAttributeNames or direct)
pk_name: []const u8,
/// Partition key value (owned)
pk_value: types.AttributeValue,
/// Sort key condition (optional)
sk_condition: ?SortKeyCondition,
pub fn deinit(self: *KeyCondition, allocator: std.mem.Allocator) void {
json_module.deinitAttributeValue(&self.pk_value, allocator);
if (self.sk_condition) |*sk| {
sk.deinit(allocator);
}
}
/// Get the raw partition key value bytes (for building storage keys)
pub fn getPkBytes(self: *const KeyCondition) ![]const u8 {
return switch (self.pk_value) {
.S => |s| s,
.N => |n| n,
.B => |b| b,
else => error.InvalidKeyType,
};
}
};
/// Sort key condition operators
pub const SortKeyOperator = enum {
EQ, // =
LT, // <
LE, // <=
GT, // >
GE, // >=
BETWEEN, // BETWEEN x AND y
BEGINS_WITH, // begins_with(sk, prefix)
};
/// Parsed sort key condition
pub const SortKeyCondition = struct {
/// Sort key attribute name
sk_name: []const u8,
/// Comparison operator
operator: SortKeyOperator,
/// Primary value (or lower bound for BETWEEN) - owned
value: types.AttributeValue,
/// Upper bound for BETWEEN operator - owned
value2: ?types.AttributeValue,
pub fn deinit(self: *SortKeyCondition, allocator: std.mem.Allocator) void {
json_module.deinitAttributeValue(&self.value, allocator);
if (self.value2) |*v2| {
json_module.deinitAttributeValue(v2, allocator);
}
}
};
/// Parse a KeyConditionExpression with ExpressionAttributeNames and ExpressionAttributeValues
/// Returns owned KeyCondition - caller must call deinit()
///
/// Supported formats:
/// - "pk = :pk"
/// - "#pk = :pk"
/// - "pk = :pk AND sk = :sk"
/// - "pk = :pk AND sk > :sk"
/// - "pk = :pk AND sk BETWEEN :sk1 AND :sk2"
/// - "pk = :pk AND begins_with(sk, :prefix)"
pub fn parseKeyConditionExpression(
allocator: std.mem.Allocator,
expression: []const u8,
attribute_names: ?std.StringHashMap([]const u8),
attribute_values: std.StringHashMap(types.AttributeValue),
) !KeyCondition {
var tokenizer = Tokenizer.init(expression);
// Parse partition key condition: pk_name = :pk_value
const pk_name_token = tokenizer.nextToken() orelse return error.InvalidExpression;
const pk_name = resolveAttributeName(pk_name_token, attribute_names) orelse return error.InvalidExpression;
const eq_token = tokenizer.nextToken() orelse return error.InvalidExpression;
if (!std.mem.eql(u8, eq_token, "=")) return error.InvalidExpression;
const pk_value_token = tokenizer.nextToken() orelse return error.InvalidExpression;
var pk_value = try resolveAttributeValue(allocator, pk_value_token, attribute_values);
errdefer json_module.deinitAttributeValue(&pk_value, allocator);
// Check for AND (sort key condition)
var sk_condition: ?SortKeyCondition = null;
if (tokenizer.nextToken()) |and_token| {
if (!std.ascii.eqlIgnoreCase(and_token, "AND")) {
return error.InvalidExpression;
}
sk_condition = try parseSortKeyCondition(allocator, &tokenizer, attribute_names, attribute_values);
}
return KeyCondition{
.pk_name = pk_name,
.pk_value = pk_value,
.sk_condition = sk_condition,
};
}
fn parseSortKeyCondition(
allocator: std.mem.Allocator,
tokenizer: *Tokenizer,
attribute_names: ?std.StringHashMap([]const u8),
attribute_values: std.StringHashMap(types.AttributeValue),
) !SortKeyCondition {
const first_token = tokenizer.nextToken() orelse return error.InvalidExpression;
// Check for begins_with(sk, :value)
if (std.ascii.eqlIgnoreCase(first_token, "begins_with")) {
return try parseBeginsWith(allocator, tokenizer, attribute_names, attribute_values);
}
// Otherwise it's: sk_name operator :value
const sk_name = resolveAttributeName(first_token, attribute_names) orelse return error.InvalidExpression;
const op_token = tokenizer.nextToken() orelse return error.InvalidExpression;
const operator = parseOperator(op_token) orelse return error.InvalidExpression;
const value_token = tokenizer.nextToken() orelse return error.InvalidExpression;
var value = try resolveAttributeValue(allocator, value_token, attribute_values);
errdefer json_module.deinitAttributeValue(&value, allocator);
// Check for BETWEEN ... AND ...
var value2: ?types.AttributeValue = null;
if (operator == .BETWEEN) {
const and_token = tokenizer.nextToken() orelse return error.InvalidExpression;
if (!std.ascii.eqlIgnoreCase(and_token, "AND")) {
return error.InvalidExpression;
}
const value2_token = tokenizer.nextToken() orelse return error.InvalidExpression;
value2 = try resolveAttributeValue(allocator, value2_token, attribute_values);
}
return SortKeyCondition{
.sk_name = sk_name,
.operator = operator,
.value = value,
.value2 = value2,
};
}
fn parseBeginsWith(
allocator: std.mem.Allocator,
tokenizer: *Tokenizer,
attribute_names: ?std.StringHashMap([]const u8),
attribute_values: std.StringHashMap(types.AttributeValue),
) !SortKeyCondition {
// Expect: ( sk_name , :value )
const lparen = tokenizer.nextToken() orelse return error.InvalidExpression;
if (!std.mem.eql(u8, lparen, "(")) return error.InvalidExpression;
const sk_name_token = tokenizer.nextToken() orelse return error.InvalidExpression;
const sk_name = resolveAttributeName(sk_name_token, attribute_names) orelse return error.InvalidExpression;
const comma = tokenizer.nextToken() orelse return error.InvalidExpression;
if (!std.mem.eql(u8, comma, ",")) return error.InvalidExpression;
const value_token = tokenizer.nextToken() orelse return error.InvalidExpression;
var value = try resolveAttributeValue(allocator, value_token, attribute_values);
errdefer json_module.deinitAttributeValue(&value, allocator);
const rparen = tokenizer.nextToken() orelse return error.InvalidExpression;
if (!std.mem.eql(u8, rparen, ")")) return error.InvalidExpression;
return SortKeyCondition{
.sk_name = sk_name,
.operator = .BEGINS_WITH,
.value = value,
.value2 = null,
};
}
fn parseOperator(token: []const u8) ?SortKeyOperator {
if (std.mem.eql(u8, token, "=")) return .EQ;
if (std.mem.eql(u8, token, "<")) return .LT;
if (std.mem.eql(u8, token, "<=")) return .LE;
if (std.mem.eql(u8, token, ">")) return .GT;
if (std.mem.eql(u8, token, ">=")) return .GE;
if (std.ascii.eqlIgnoreCase(token, "BETWEEN")) return .BETWEEN;
return null;
}
fn resolveAttributeName(token: []const u8, names: ?std.StringHashMap([]const u8)) ?[]const u8 {
if (token.len > 0 and token[0] == '#') {
// Expression attribute name placeholder
if (names) |n| {
return n.get(token);
}
return null;
}
// Direct attribute name
return token;
}
fn resolveAttributeValue(
allocator: std.mem.Allocator,
token: []const u8,
values: std.StringHashMap(types.AttributeValue),
) !types.AttributeValue {
if (token.len > 0 and token[0] == ':') {
// Expression attribute value placeholder
const original = values.get(token) orelse return error.MissingAttributeValue;
return try json_module.deepCopyAttributeValue(allocator, original);
}
return error.InvalidExpression;
}
// ============================================================================
// Request Parsing Helpers
// ============================================================================
/// Parse ExpressionAttributeNames from request body
/// Returns null if not present
pub fn parseExpressionAttributeNames(
allocator: std.mem.Allocator,
request_body: []const u8,
) !?std.StringHashMap([]const u8) {
const parsed = std.json.parseFromSlice(std.json.Value, allocator, request_body, .{}) catch return null;
defer parsed.deinit();
const root = switch (parsed.value) {
.object => |o| o,
else => return null,
};
const names_val = root.get("ExpressionAttributeNames") orelse return null;
const names_obj = switch (names_val) {
.object => |o| o,
else => return null,
};
var result = std.StringHashMap([]const u8).init(allocator);
errdefer {
var iter = result.iterator();
while (iter.next()) |entry| {
allocator.free(entry.key_ptr.*);
allocator.free(entry.value_ptr.*);
}
result.deinit();
}
var iter = names_obj.iterator();
while (iter.next()) |entry| {
const key = try allocator.dupe(u8, entry.key_ptr.*);
errdefer allocator.free(key);
const value = switch (entry.value_ptr.*) {
.string => |s| try allocator.dupe(u8, s),
else => {
allocator.free(key);
continue;
},
};
try result.put(key, value);
}
return result;
}
/// Parse ExpressionAttributeValues from request body
/// Returns owned HashMap - caller must free
pub fn parseExpressionAttributeValues(
allocator: std.mem.Allocator,
request_body: []const u8,
) !std.StringHashMap(types.AttributeValue) {
const parsed = std.json.parseFromSlice(std.json.Value, allocator, request_body, .{}) catch
return std.StringHashMap(types.AttributeValue).init(allocator);
defer parsed.deinit();
const root = switch (parsed.value) {
.object => |o| o,
else => return std.StringHashMap(types.AttributeValue).init(allocator),
};
const values_val = root.get("ExpressionAttributeValues") orelse
return std.StringHashMap(types.AttributeValue).init(allocator);
const values_obj = switch (values_val) {
.object => |o| o,
else => return std.StringHashMap(types.AttributeValue).init(allocator),
};
var result = std.StringHashMap(types.AttributeValue).init(allocator);
errdefer {
var iter = result.iterator();
while (iter.next()) |entry| {
allocator.free(entry.key_ptr.*);
json_module.deinitAttributeValue(entry.value_ptr, allocator);
}
result.deinit();
}
var iter = values_obj.iterator();
while (iter.next()) |entry| {
const key = try allocator.dupe(u8, entry.key_ptr.*);
errdefer allocator.free(key);
var value = json_module.parseAttributeValue(allocator, entry.value_ptr.*) catch continue;
errdefer json_module.deinitAttributeValue(&value, allocator);
try result.put(key, value);
}
return result;
}
/// Parse KeyConditionExpression string from request body
pub fn parseKeyConditionExpressionString(
request_body: []const u8,
) ?[]const u8 {
// Use a simple search to avoid allocation for this common operation
const marker = "\"KeyConditionExpression\"";
const start_idx = std.mem.indexOf(u8, request_body, marker) orelse return null;
// Find the colon after the key
const colon_idx = std.mem.indexOfPos(u8, request_body, start_idx + marker.len, ":") orelse return null;
// Find the opening quote
var pos = colon_idx + 1;
while (pos < request_body.len and request_body[pos] != '"') : (pos += 1) {}
if (pos >= request_body.len) return null;
pos += 1; // Skip opening quote
// Find the closing quote (handle escaped quotes)
const value_start = pos;
while (pos < request_body.len) {
if (request_body[pos] == '"' and (pos == 0 or request_body[pos - 1] != '\\')) {
return request_body[value_start..pos];
}
pos += 1;
}
return null;
}
/// Convenience function to parse and evaluate a complete Query key condition
/// Returns owned KeyCondition - caller must call deinit()
pub fn parseQueryKeyCondition(
allocator: std.mem.Allocator,
request_body: []const u8,
) !?KeyCondition {
// Parse expression string
const expression = parseKeyConditionExpressionString(request_body) orelse return null;
// Parse attribute names (optional)
var attr_names = try parseExpressionAttributeNames(allocator, request_body);
defer if (attr_names) |*names| {
deinitExpressionAttributeNames(names, allocator);
};
// Parse attribute values
var attr_values = try parseExpressionAttributeValues(allocator, request_body);
defer deinitExpressionAttributeValues(&attr_values, allocator);
return try parseKeyConditionExpression(allocator, expression, attr_names, attr_values);
}
// ============================================================================
// Simple Tokenizer
// ============================================================================
const Tokenizer = struct {
input: []const u8,
pos: usize,
pub fn init(input: []const u8) Tokenizer {
return .{ .input = input, .pos = 0 };
}
pub fn nextToken(self: *Tokenizer) ?[]const u8 {
// Skip whitespace
while (self.pos < self.input.len and std.ascii.isWhitespace(self.input[self.pos])) {
self.pos += 1;
}
if (self.pos >= self.input.len) return null;
const start = self.pos;
// Single-character tokens
const c = self.input[self.pos];
if (c == '(' or c == ')' or c == ',') {
self.pos += 1;
return self.input[start..self.pos];
}
// Two-character operators
if (self.pos + 1 < self.input.len) {
const two = self.input[self.pos .. self.pos + 2];
if (std.mem.eql(u8, two, "<=") or std.mem.eql(u8, two, ">=") or std.mem.eql(u8, two, "<>")) {
self.pos += 2;
return two;
}
}
// Single-character operators
if (c == '=' or c == '<' or c == '>') {
self.pos += 1;
return self.input[start..self.pos];
}
// Identifier or keyword (includes :placeholder and #name)
while (self.pos < self.input.len) {
const ch = self.input[self.pos];
if (std.ascii.isAlphanumeric(ch) or ch == '_' or ch == ':' or ch == '#' or ch == '-') {
self.pos += 1;
} else {
break;
}
}
if (self.pos > start) {
return self.input[start..self.pos];
}
// Unknown character, skip it
self.pos += 1;
return self.nextToken();
}
};
// ============================================================================
// Helpers for freeing parsed expression data
// ============================================================================
pub fn deinitExpressionAttributeNames(names: *std.StringHashMap([]const u8), allocator: std.mem.Allocator) void {
var iter = names.iterator();
while (iter.next()) |entry| {
allocator.free(entry.key_ptr.*);
allocator.free(entry.value_ptr.*);
}
names.deinit();
}
pub fn deinitExpressionAttributeValues(values: *std.StringHashMap(types.AttributeValue), allocator: std.mem.Allocator) void {
var iter = values.iterator();
while (iter.next()) |entry| {
allocator.free(entry.key_ptr.*);
json_module.deinitAttributeValue(entry.value_ptr, allocator);
}
values.deinit();
}
// ============================================================================
// Tests
// ============================================================================
test "tokenizer basic" {
var t = Tokenizer.init("pk = :pk AND sk > :sk");
try std.testing.expectEqualStrings("pk", t.nextToken().?);
try std.testing.expectEqualStrings("=", t.nextToken().?);
try std.testing.expectEqualStrings(":pk", t.nextToken().?);
try std.testing.expectEqualStrings("AND", t.nextToken().?);
try std.testing.expectEqualStrings("sk", t.nextToken().?);
try std.testing.expectEqualStrings(">", t.nextToken().?);
try std.testing.expectEqualStrings(":sk", t.nextToken().?);
try std.testing.expect(t.nextToken() == null);
}
test "tokenizer begins_with" {
var t = Tokenizer.init("pk = :pk AND begins_with(sk, :prefix)");
try std.testing.expectEqualStrings("pk", t.nextToken().?);
try std.testing.expectEqualStrings("=", t.nextToken().?);
try std.testing.expectEqualStrings(":pk", t.nextToken().?);
try std.testing.expectEqualStrings("AND", t.nextToken().?);
try std.testing.expectEqualStrings("begins_with", t.nextToken().?);
try std.testing.expectEqualStrings("(", t.nextToken().?);
try std.testing.expectEqualStrings("sk", t.nextToken().?);
try std.testing.expectEqualStrings(",", t.nextToken().?);
try std.testing.expectEqualStrings(":prefix", t.nextToken().?);
try std.testing.expectEqualStrings(")", t.nextToken().?);
try std.testing.expect(t.nextToken() == null);
}