Files
jormun-db/dynamodb/item_codec.odin

536 lines
11 KiB
Odin
Raw Permalink Normal View History

2026-02-15 12:42:55 -05:00
// Binary TLV (Type-Length-Value) encoding for DynamoDB items
// Replaces JSON storage with efficient binary format
// Format: [attribute_count][name_len][name][type_tag][value_len][value]...
2026-02-15 13:56:08 -05:00
package dynamodb
2026-02-15 12:42:55 -05:00
import "core:bytes"
import "core:slice"
// Type tags for binary encoding (1 byte each)
Type_Tag :: enum u8 {
// Scalar types
String = 0x01, // S
Number = 0x02, // N (stored as string)
Binary = 0x03, // B (base64 string)
Boolean = 0x04, // BOOL
Null = 0x05, // NULL
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
// Set types
String_Set = 0x10, // SS
Number_Set = 0x11, // NS
Binary_Set = 0x12, // BS
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
// Complex types
List = 0x20, // L
Map = 0x21, // M
}
// ============================================================================
// Encoding (Item → Binary)
// ============================================================================
// Encode an Item to binary TLV format
// Format: [attribute_count:varint][attributes...]
// Each attribute: [name_len:varint][name:bytes][type_tag:u8][value_encoded:bytes]
2026-02-15 13:56:08 -05:00
encode :: proc(item: Item) -> ([]byte, bool) {
2026-02-15 12:42:55 -05:00
buf: bytes.Buffer
bytes.buffer_init_allocator(&buf, 0, 1024, context.allocator)
defer bytes.buffer_destroy(&buf)
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
// Write attribute count
encode_varint(&buf, len(item))
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
// Collect and sort keys for deterministic encoding
keys := make([dynamic]string, context.temp_allocator)
for key in item {
append(&keys, key)
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
slice.sort_by(keys[:], proc(a, b: string) -> bool {
return a < b
})
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
// Encode each attribute
for key in keys {
value := item[key]
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
// Write attribute name
encode_varint(&buf, len(key))
bytes.buffer_write_string(&buf, key)
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
// Encode attribute value
ok := encode_attribute_value(&buf, value)
if !ok {
return nil, false
}
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
return bytes.buffer_to_bytes(&buf), true
}
// Encode an AttributeValue to binary format
2026-02-15 13:56:08 -05:00
encode_attribute_value :: proc(buf: ^bytes.Buffer, attr: Attribute_Value) -> bool {
2026-02-15 12:42:55 -05:00
switch v in attr {
2026-02-15 13:56:08 -05:00
case String:
2026-02-15 12:42:55 -05:00
bytes.buffer_write_byte(buf, u8(Type_Tag.String))
encode_varint(buf, len(v))
bytes.buffer_write_string(buf, string(v))
2026-02-15 13:56:08 -05:00
case DDB_Number:
bytes.buffer_write_byte(buf, u8(Type_Tag.Number))
// Store as string in item encoding
num_str := format_ddb_number(v)
encode_varint(buf, len(num_str))
bytes.buffer_write_string(buf, num_str)
2026-02-15 13:56:08 -05:00
case Binary:
2026-02-15 12:42:55 -05:00
bytes.buffer_write_byte(buf, u8(Type_Tag.Binary))
encode_varint(buf, len(v))
bytes.buffer_write_string(buf, string(v))
2026-02-15 13:56:08 -05:00
case Bool:
2026-02-15 12:42:55 -05:00
bytes.buffer_write_byte(buf, u8(Type_Tag.Boolean))
bytes.buffer_write_byte(buf, 1 if bool(v) else 0)
2026-02-15 13:56:08 -05:00
case Null:
2026-02-15 12:42:55 -05:00
bytes.buffer_write_byte(buf, u8(Type_Tag.Null))
// NULL has no value bytes
2026-02-15 13:56:08 -05:00
case DDB_Number_Set:
bytes.buffer_write_byte(buf, u8(Type_Tag.Number_Set)) // Use Number_Set tag, not DDB_Number_Set
encode_varint(buf, len(v))
for num in v {
// Format the DDB_Number to a string
num_str := format_ddb_number(num)
encode_varint(buf, len(num_str))
bytes.buffer_write_string(buf, num_str)
}
2026-02-15 13:56:08 -05:00
case String_Set:
2026-02-15 12:42:55 -05:00
bytes.buffer_write_byte(buf, u8(Type_Tag.String_Set))
encode_varint(buf, len(v))
for s in v {
encode_varint(buf, len(s))
bytes.buffer_write_string(buf, s)
}
2026-02-15 13:56:08 -05:00
case Binary_Set:
2026-02-15 12:42:55 -05:00
bytes.buffer_write_byte(buf, u8(Type_Tag.Binary_Set))
encode_varint(buf, len(v))
for b in v {
encode_varint(buf, len(b))
bytes.buffer_write_string(buf, b)
}
2026-02-15 13:56:08 -05:00
case List:
2026-02-15 12:42:55 -05:00
bytes.buffer_write_byte(buf, u8(Type_Tag.List))
encode_varint(buf, len(v))
for item in v {
ok := encode_attribute_value(buf, item)
if !ok {
return false
}
}
2026-02-15 13:56:08 -05:00
case Map:
2026-02-15 12:42:55 -05:00
bytes.buffer_write_byte(buf, u8(Type_Tag.Map))
encode_varint(buf, len(v))
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
// Collect and sort keys for deterministic encoding
keys := make([dynamic]string, context.temp_allocator)
for key in v {
append(&keys, key)
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
slice.sort_by(keys[:], proc(a, b: string) -> bool {
return a < b
})
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
// Encode each map entry
for key in keys {
value := v[key]
encode_varint(buf, len(key))
bytes.buffer_write_string(buf, key)
ok := encode_attribute_value(buf, value)
if !ok {
return false
}
}
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
return true
}
// ============================================================================
// Decoding (Binary → Item)
// ============================================================================
// Binary decoder helper
Binary_Decoder :: struct {
data: []byte,
pos: int,
}
decoder_read_byte :: proc(decoder: ^Binary_Decoder) -> (u8, bool) {
if decoder.pos >= len(decoder.data) {
return 0, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
byte := decoder.data[decoder.pos]
decoder.pos += 1
return byte, true
}
decoder_read_bytes :: proc(decoder: ^Binary_Decoder, length: int) -> ([]byte, bool) {
if decoder.pos + length > len(decoder.data) {
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
bytes := decoder.data[decoder.pos:decoder.pos + length]
decoder.pos += length
return bytes, true
}
decoder_read_varint :: proc(decoder: ^Binary_Decoder) -> (int, bool) {
result: int = 0
shift: uint = 0
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
for decoder.pos < len(decoder.data) {
byte := decoder.data[decoder.pos]
decoder.pos += 1
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
result |= int(byte & 0x7F) << shift
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
if (byte & 0x80) == 0 {
return result, true
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
shift += 7
if shift >= 64 {
return 0, false // Varint overflow
}
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
return 0, false // Unexpected end of data
}
// Decode binary TLV format back into an Item
2026-02-15 13:56:08 -05:00
decode :: proc(data: []byte) -> (Item, bool) {
decoder := Binary_Decoder{data = data, pos = 0}
2026-02-15 12:42:55 -05:00
attr_count, count_ok := decoder_read_varint(&decoder)
if !count_ok {
return {}, false
}
2026-02-15 13:56:08 -05:00
item := make(Item)
for _ in 0..<attr_count {
2026-02-15 12:42:55 -05:00
// Read attribute name
name_len, name_len_ok := decoder_read_varint(&decoder)
if !name_len_ok {
// Cleanup on error
2026-02-15 13:56:08 -05:00
item_destroy(&item)
2026-02-15 12:42:55 -05:00
return {}, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
name_bytes, name_ok := decoder_read_bytes(&decoder, name_len)
if !name_ok {
2026-02-15 13:56:08 -05:00
item_destroy(&item)
2026-02-15 12:42:55 -05:00
return {}, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
owned_name := string(name_bytes)
owned_name = transmute(string)slice.clone(transmute([]byte)owned_name)
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
// Read attribute value
value, value_ok := decode_attribute_value(&decoder)
if !value_ok {
delete(owned_name)
2026-02-15 13:56:08 -05:00
item_destroy(&item)
2026-02-15 12:42:55 -05:00
return {}, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
item[owned_name] = value
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
return item, true
}
// Decode an AttributeValue from binary format
2026-02-15 13:56:08 -05:00
decode_attribute_value :: proc(decoder: ^Binary_Decoder) -> (Attribute_Value, bool) {
2026-02-15 12:42:55 -05:00
type_byte, type_ok := decoder_read_byte(decoder)
if !type_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
type_tag := Type_Tag(type_byte)
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
switch type_tag {
case .String:
length, len_ok := decoder_read_varint(decoder)
if !len_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
data, data_ok := decoder_read_bytes(decoder, length)
if !data_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
str := string(data)
owned := transmute(string)slice.clone(transmute([]byte)str)
2026-02-15 13:56:08 -05:00
return String(owned), true
2026-02-15 12:42:55 -05:00
case .Number:
length, len_ok := decoder_read_varint(decoder)
if !len_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
data, data_ok := decoder_read_bytes(decoder, length)
if !data_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
num_str := string(data)
// Parse into DDB_Number
ddb_num, num_ok := parse_ddb_number(num_str)
if !num_ok {
return nil, false
}
return ddb_num, true
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
case .Binary:
length, len_ok := decoder_read_varint(decoder)
if !len_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
data, data_ok := decoder_read_bytes(decoder, length)
if !data_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
str := string(data)
owned := transmute(string)slice.clone(transmute([]byte)str)
2026-02-15 13:56:08 -05:00
return Binary(owned), true
2026-02-15 12:42:55 -05:00
case .Boolean:
byte, byte_ok := decoder_read_byte(decoder)
if !byte_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
return Bool(byte != 0), true
2026-02-15 12:42:55 -05:00
case .Null:
2026-02-15 13:56:08 -05:00
return Null(true), true
2026-02-15 12:42:55 -05:00
case .String_Set:
count, count_ok := decoder_read_varint(decoder)
if !count_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
strings := make([]string, count)
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
for i in 0..<count {
length, len_ok := decoder_read_varint(decoder)
if !len_ok {
// Cleanup on error
for j in 0..<i {
delete(strings[j])
}
delete(strings)
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
data, data_ok := decoder_read_bytes(decoder, length)
if !data_ok {
for j in 0..<i {
delete(strings[j])
}
delete(strings)
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
str := string(data)
strings[i] = transmute(string)slice.clone(transmute([]byte)str)
}
2026-02-15 13:56:08 -05:00
return String_Set(strings), true
2026-02-15 12:42:55 -05:00
case .Number_Set:
count, count_ok := decoder_read_varint(decoder)
if !count_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
numbers := make([]DDB_Number, count) // Changed to DDB_Number
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
for i in 0..<count {
length, len_ok := decoder_read_varint(decoder)
if !len_ok {
// No cleanup needed for DDB_Number (no heap allocations)
2026-02-15 12:42:55 -05:00
delete(numbers)
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
data, data_ok := decoder_read_bytes(decoder, length)
if !data_ok {
delete(numbers)
return nil, false
}
2026-02-15 13:56:08 -05:00
num_str := string(data)
// Parse into DDB_Number
ddb_num, num_ok := parse_ddb_number(num_str)
if !num_ok {
delete(numbers)
return nil, false
}
numbers[i] = ddb_num
2026-02-15 12:42:55 -05:00
}
2026-02-15 13:56:08 -05:00
return DDB_Number_Set(numbers), true
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
case .Binary_Set:
count, count_ok := decoder_read_varint(decoder)
if !count_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
binaries := make([]string, count)
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
for i in 0..<count {
length, len_ok := decoder_read_varint(decoder)
if !len_ok {
for j in 0..<i {
delete(binaries[j])
}
delete(binaries)
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
data, data_ok := decoder_read_bytes(decoder, length)
if !data_ok {
for j in 0..<i {
delete(binaries[j])
}
delete(binaries)
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
str := string(data)
binaries[i] = transmute(string)slice.clone(transmute([]byte)str)
}
2026-02-15 13:56:08 -05:00
return Binary_Set(binaries), true
2026-02-15 12:42:55 -05:00
case .List:
count, count_ok := decoder_read_varint(decoder)
if !count_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
list := make([]Attribute_Value, count)
2026-02-15 12:42:55 -05:00
for i in 0..<count {
value, value_ok := decode_attribute_value(decoder)
if !value_ok {
// Cleanup on error
for j in 0..<i {
item := list[j]
2026-02-15 13:56:08 -05:00
attr_value_destroy(&item)
2026-02-15 12:42:55 -05:00
}
delete(list)
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
list[i] = value
}
2026-02-15 13:56:08 -05:00
return List(list), true
2026-02-15 12:42:55 -05:00
case .Map:
count, count_ok := decoder_read_varint(decoder)
if !count_ok {
return nil, false
}
2026-02-15 13:56:08 -05:00
attr_map := make(map[string]Attribute_Value)
for _ in 0..<count {
2026-02-15 12:42:55 -05:00
// Read key
key_len, key_len_ok := decoder_read_varint(decoder)
if !key_len_ok {
// Cleanup on error
for k, v in attr_map {
delete(k)
v_copy := v
2026-02-15 13:56:08 -05:00
attr_value_destroy(&v_copy)
2026-02-15 12:42:55 -05:00
}
delete(attr_map)
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
key_bytes, key_ok := decoder_read_bytes(decoder, key_len)
if !key_ok {
for k, v in attr_map {
delete(k)
v_copy := v
2026-02-15 13:56:08 -05:00
attr_value_destroy(&v_copy)
2026-02-15 12:42:55 -05:00
}
delete(attr_map)
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
key := string(key_bytes)
owned_key := transmute(string)slice.clone(transmute([]byte)key)
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
// Read value
value, value_ok := decode_attribute_value(decoder)
if !value_ok {
delete(owned_key)
for k, v in attr_map {
delete(k)
v_copy := v
2026-02-15 13:56:08 -05:00
attr_value_destroy(&v_copy)
2026-02-15 12:42:55 -05:00
}
delete(attr_map)
return nil, false
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
attr_map[owned_key] = value
}
2026-02-15 13:56:08 -05:00
return Map(attr_map), true
2026-02-15 12:42:55 -05:00
}
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
return nil, false
}
// ============================================================================
2026-02-15 13:56:08 -05:00
// Varint Encoding (Encodes a varint length prefix)
2026-02-15 12:42:55 -05:00
// ============================================================================
encode_varint :: proc(buf: ^bytes.Buffer, value: int) {
v := value
for {
byte := u8(v & 0x7F)
v >>= 7
2026-02-15 13:56:08 -05:00
2026-02-15 12:42:55 -05:00
if v == 0 {
bytes.buffer_write_byte(buf, byte)
return
} else {
bytes.buffer_write_byte(buf, byte | 0x80)
}
}
}