Improve database schema for notes-with-children

This commit is contained in:
Andrew Mulbrook 2026-03-24 22:51:56 -05:00
parent 82c272990c
commit 94d00c94d4
2 changed files with 299 additions and 99 deletions

View file

@ -10,17 +10,30 @@ import argparse
import sqlite3
import sys
import uuid
from typing import NamedTuple
from datetime import datetime, timezone
from pathlib import Path
NOTE_ROOT_ID = uuid.UUID(int=0)
class ObjectInfo(NamedTuple):
id: uuid.UUID
type: str
created: datetime
modified: datetime
executable: bool
hidden: bool
_SCHEMA = """
CREATE TABLE IF NOT EXISTS objects (
id TEXT PRIMARY KEY,
type TEXT NOT NULL CHECK(type IN ('blob', 'tree')),
data BLOB,
modified TEXT NOT NULL
id TEXT PRIMARY KEY,
type TEXT NOT NULL,
data BLOB,
created REAL NOT NULL,
modified REAL NOT NULL,
executable INTEGER NOT NULL DEFAULT 0,
hidden INTEGER NOT NULL DEFAULT 0
);
CREATE TABLE IF NOT EXISTS metadata (
@ -35,12 +48,24 @@ CREATE TABLE IF NOT EXISTS labels
label TEXT PRIMARY KEY,
id TEXT NOT NULL REFERENCES objects(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS tree_entries (
parent_id TEXT NOT NULL REFERENCES objects(id) ON DELETE CASCADE,
name TEXT NOT NULL,
child_id TEXT NOT NULL REFERENCES objects(id) ON DELETE CASCADE,
PRIMARY KEY (parent_id, name)
);
"""
type SqlObjectId = str | uuid.UUID
def _now() -> str:
return datetime.now(timezone.utc).isoformat()
def _now() -> float:
"""Current UTC time as a Unix epoch float."""
return datetime.now(timezone.utc).timestamp()
def _to_datetime(ts: float) -> datetime:
"""Convert a Unix epoch float to a UTC datetime."""
return datetime.fromtimestamp(ts, tz=timezone.utc)
def _sql_id(id: SqlObjectId | None) -> str | None:
if id is None:
@ -49,10 +74,6 @@ def _sql_id(id: SqlObjectId | None) -> str | None:
id if isinstance(id, str) else str(id)
)
def _initialize_db(con: sqlite3.Connection):
con.executescript(_SCHEMA)
con.commit()
class Sqlite3Trove:
def __init__(self, con: sqlite3.Connection):
@ -79,7 +100,7 @@ class Sqlite3Trove:
con.commit()
obj = cls(con)
if initialize:
obj.write_tree(b"", NOTE_ROOT_ID)
obj._write_object(b"", "inode/directory", NOTE_ROOT_ID)
return obj
def close(self):
@ -91,6 +112,36 @@ class Sqlite3Trove:
def __exit__(self, *_):
self.close()
# ------------------------------------------------------------------
# Object info
# ------------------------------------------------------------------
def get_info(self, object_id: SqlObjectId) -> ObjectInfo | None:
"""Return an ObjectInfo namedtuple for the object, or None if not found."""
row = self._con.execute(
"SELECT id, type, created, modified, executable, hidden "
"FROM objects WHERE id = ?",
(_sql_id(object_id),),
).fetchone()
if row is None:
return None
return ObjectInfo(
id=uuid.UUID(row["id"]),
type=row["type"],
created=_to_datetime(row["created"]),
modified=_to_datetime(row["modified"]),
executable=bool(row["executable"]),
hidden=bool(row["hidden"]),
)
def is_tree(self, object_id: SqlObjectId) -> bool:
"""Return True if the object has any children in tree_entries."""
row = self._con.execute(
"SELECT 1 FROM tree_entries WHERE parent_id = ? LIMIT 1",
(_sql_id(object_id),),
).fetchone()
return row is not None
# ------------------------------------------------------------------
# CRUD operations
# ------------------------------------------------------------------
@ -103,9 +154,9 @@ class Sqlite3Trove:
return row["type"] if row else None
def read_object(self, object_id: SqlObjectId) -> bytes | None:
"""Return raw data for a blob object, or None if not found."""
"""Return raw data for an object, or None if not found."""
row = self._con.execute(
"SELECT data, type FROM objects WHERE id = ?", (_sql_id(object_id),)
"SELECT data FROM objects WHERE id = ?", (_sql_id(object_id),)
).fetchone()
if row is None:
return None
@ -118,7 +169,7 @@ class Sqlite3Trove:
).fetchone()
if row is None:
return None
return datetime.fromisoformat(row["modified"])
return _to_datetime(row["modified"])
def read_metadata(self, object_id: SqlObjectId, key: str) -> bytes | None:
"""Return raw metadata value for (uuid, key), or None if not found."""
@ -130,45 +181,101 @@ class Sqlite3Trove:
return bytes(row["value"]) if row["value"] is not None else b""
def write_metadata(self, object_id: SqlObjectId, key: str, value: bytes) -> None:
"""Upsert a metadata row. db.py has no write_metadata, so we go direct."""
"""Upsert a metadata row."""
self._con.execute(
"INSERT OR REPLACE INTO metadata (id, key, value) VALUES (?, ?, ?)",
(_sql_id(object_id), key, value),
)
self._con.commit()
def _write_object(self, data: bytes, dtype: str, object_id: str | uuid.UUID | None = None) -> str:
"""
Insert or replace an object. Returns the id.
If object_id is None, creates a new object with a new UUID.
If object_id is provided, updates or creates the object with that ID.
"""
modified = _now()
if object_id is None:
object_id = uuid.uuid4()
def write_content(self, object_id: SqlObjectId, data: bytes) -> None:
"""Update only the data and modified timestamp. Preserves type and flags."""
self._con.execute(
"INSERT OR REPLACE INTO objects (id, type, data, modified) VALUES (?, ?, ?, ?)",
(_sql_id(object_id), dtype, data, modified)
"UPDATE objects SET data = ?, modified = ? WHERE id = ?",
(data, _now(), _sql_id(object_id)),
)
self._con.commit()
return _sql_id(object_id)
def write_blob(self, data: bytes, object_id: SqlObjectId | None = None) -> str:
def _write_object(
self,
data: bytes,
dtype: str,
object_id: str | uuid.UUID | None = None,
executable: bool = False,
hidden: bool = False,
) -> str:
"""
Insert or replace an object. Returns the id.
On INSERT, both created and modified are set to now.
On REPLACE (existing id), created is preserved and modified is updated.
"""
now = _now()
if object_id is None:
object_id = uuid.uuid4()
sid = _sql_id(object_id)
# Preserve created timestamp on update
row = self._con.execute(
"SELECT created FROM objects WHERE id = ?", (sid,)
).fetchone()
created = row["created"] if row else now
self._con.execute(
"""INSERT INTO objects (id, type, data, created, modified, executable, hidden)
VALUES (?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
type=excluded.type, data=excluded.data, modified=excluded.modified,
executable=excluded.executable, hidden=excluded.hidden""",
(sid, dtype, data, created, now, int(executable), int(hidden)),
)
self._con.commit()
return sid
def write_blob(self, data: bytes, object_id: SqlObjectId | None = None,
dtype: str = "application/octet-stream",
executable: bool = False, hidden: bool = False) -> str:
"""
Insert or replace a blob. Returns the id.
Pass object_id to update an existing object.
"""
return self._write_object(data, "blob", _sql_id(object_id))
return self._write_object(data, dtype, _sql_id(object_id),
executable=executable, hidden=hidden)
def write_tree(self, data: bytes, object_id: SqlObjectId | None = None) -> str:
def write_tree(self, data: bytes, object_id: SqlObjectId | None = None,
hidden: bool = False) -> str:
"""Write a tree-typed object. Returns the assigned id."""
return self._write_object(data, "tree", _sql_id(object_id))
return self._write_object(data, "inode/directory", _sql_id(object_id),
hidden=hidden)
def set_executable(self, object_id: SqlObjectId, executable: bool) -> None:
"""Set or clear the executable flag on an object."""
self._con.execute(
"UPDATE objects SET executable = ?, modified = ? WHERE id = ?",
(int(executable), _now(), _sql_id(object_id)),
)
self._con.commit()
def set_hidden(self, object_id: SqlObjectId, hidden: bool) -> None:
"""Set or clear the hidden flag on an object."""
self._con.execute(
"UPDATE objects SET hidden = ?, modified = ? WHERE id = ?",
(int(hidden), _now(), _sql_id(object_id)),
)
self._con.commit()
def set_type(self, object_id: SqlObjectId, dtype: str) -> None:
"""Update the MIME type of an object."""
self._con.execute(
"UPDATE objects SET type = ?, modified = ? WHERE id = ?",
(dtype, _now(), _sql_id(object_id)),
)
self._con.commit()
def delete_object(self, object_id: SqlObjectId) -> bool:
"""
Delete a blob and all its metadata rows.
Delete an object and all its metadata rows.
Returns True if an object was deleted, False if id not found.
Foreign key cascade handles the metadata rows.
Foreign key cascade handles the metadata and tree_entries rows.
"""
cur = self._con.execute(
"DELETE FROM objects WHERE id = ?", (_sql_id(object_id),)
@ -176,6 +283,62 @@ class Sqlite3Trove:
self._con.commit()
return cur.rowcount > 0
# ------------------------------------------------------------------
# Tree entry operations
# ------------------------------------------------------------------
def link(self, parent_id: SqlObjectId, name: str, child_id: SqlObjectId) -> None:
"""
Link a child object into a tree under the given name.
Replaces any existing entry with the same name in this tree.
Both parent_id and child_id must exist in the objects table
(enforced by FK constraints).
"""
self._con.execute(
"INSERT OR REPLACE INTO tree_entries (parent_id, name, child_id) "
"VALUES (?, ?, ?)",
(_sql_id(parent_id), name, _sql_id(child_id)),
)
self._con.execute(
"UPDATE objects SET modified = ? WHERE id = ?",
(_now(), _sql_id(parent_id)),
)
self._con.commit()
def unlink(self, parent_id: SqlObjectId, name: str) -> bool:
"""
Remove a named entry from a tree.
Returns True if an entry was removed, False if not found.
Does not delete the child object itself.
"""
cur = self._con.execute(
"DELETE FROM tree_entries WHERE parent_id = ? AND name = ?",
(_sql_id(parent_id), name),
)
if cur.rowcount > 0:
self._con.execute(
"UPDATE objects SET modified = ? WHERE id = ?",
(_now(), _sql_id(parent_id)),
)
self._con.commit()
return cur.rowcount > 0
def list_tree(self, parent_id: SqlObjectId) -> dict[str, str]:
"""
Return all entries in a tree as {name: child_id}.
Returns an empty dict if the tree has no entries.
"""
rows = self._con.execute(
"SELECT name, child_id FROM tree_entries "
"WHERE parent_id = ? ORDER BY name",
(_sql_id(parent_id),),
).fetchall()
return {row["name"]: row["child_id"] for row in rows}
# ------------------------------------------------------------------
# Label operations
# ------------------------------------------------------------------
def get_label(self, label: str) -> SqlObjectId | None:
"""
Return the ID associated with a label, or None if not found.
@ -228,18 +391,25 @@ def main():
# Create database
subparsers.add_parser("create", help="Create a new database")
# Get blob
get_parser = subparsers.add_parser("get", help="Get a blob object by ID")
get_parser.add_argument("id", help="ID of the blob to retrieve")
# Get object data
get_parser = subparsers.add_parser("get", help="Get object data by ID")
get_parser.add_argument("id", help="ID of the object to retrieve")
# Info
info_parser = subparsers.add_parser("info", help="Show object metadata")
info_parser.add_argument("id", help="Object ID")
# Write blob
write_parser = subparsers.add_parser("write", help="Write data to a blob")
write_parser.add_argument("data", help="Data to write (as string, will be encoded as UTF-8)")
write_parser.add_argument("--id", help="ID of existing blob to update (optional)")
write_parser.add_argument("--type", default="text/plain", help="MIME type (default: text/plain)")
write_parser.add_argument("--executable", action="store_true", help="Mark as executable")
write_parser.add_argument("--hidden", action="store_true", help="Mark as hidden")
# Delete blob
delete_parser = subparsers.add_parser("delete", help="Delete a blob by ID")
delete_parser.add_argument("id", help="ID of the blob to delete")
# Delete object
delete_parser = subparsers.add_parser("delete", help="Delete an object by ID")
delete_parser.add_argument("id", help="ID of the object to delete")
# Set label
setlabel_parser = subparsers.add_parser("setlabel", help="Create or update a label to point to an ID")
@ -253,6 +423,19 @@ def main():
# List labels
subparsers.add_parser("labels", help="List all labels")
# Tree operations
link_parser = subparsers.add_parser("link", help="Link a child into a tree")
link_parser.add_argument("parent_id", help="Parent tree object ID")
link_parser.add_argument("name", help="Entry name")
link_parser.add_argument("child_id", help="Child object ID")
unlink_parser = subparsers.add_parser("unlink", help="Unlink a child from a tree")
unlink_parser.add_argument("parent_id", help="Parent tree object ID")
unlink_parser.add_argument("name", help="Entry name to remove")
ls_parser = subparsers.add_parser("ls", help="List tree entries")
ls_parser.add_argument("parent_id", help="Tree object ID to list")
args = parser.parse_args()
try:
@ -270,14 +453,33 @@ def main():
data = db.read_object(args.id)
db.close()
if data is None:
print(f"Blob not found: {args.id}")
print(f"Object not found: {args.id}")
sys.exit(1)
sys.stdout.buffer.write(data)
case "info":
db = Sqlite3Trove.open(args.database)
obj = db.get_info(args.id)
has_children = db.is_tree(args.id)
db.close()
if obj is None:
print(f"Object not found: {args.id}")
sys.exit(1)
print(f"id: {obj.id}")
print(f"type: {obj.type}")
print(f"created: {obj.created.isoformat()}")
print(f"modified: {obj.modified.isoformat()}")
print(f"executable: {obj.executable}")
print(f"hidden: {obj.hidden}")
print(f"children: {has_children}")
case "write":
data_bytes = args.data.encode("utf-8")
db = Sqlite3Trove.open(args.database)
object_id = db.write_blob(data_bytes, args.id)
object_id = db.write_blob(data_bytes, args.id,
dtype=args.type,
executable=args.executable,
hidden=args.hidden)
db.close()
print(object_id)
@ -286,9 +488,9 @@ def main():
deleted = db.delete_object(args.id)
db.close()
if deleted:
print(f"Deleted blob: {args.id}")
print(f"Deleted: {args.id}")
else:
print(f"Blob not found: {args.id}")
print(f"Object not found: {args.id}")
sys.exit(1)
case "setlabel":
@ -317,6 +519,32 @@ def main():
else:
print("No labels found.")
case "link":
db = Sqlite3Trove.open(args.database)
db.link(args.parent_id, args.name, args.child_id)
db.close()
print(f"Linked '{args.name}' -> {args.child_id} in {args.parent_id}")
case "unlink":
db = Sqlite3Trove.open(args.database)
removed = db.unlink(args.parent_id, args.name)
db.close()
if removed:
print(f"Unlinked '{args.name}' from {args.parent_id}")
else:
print(f"Entry '{args.name}' not found in {args.parent_id}")
sys.exit(1)
case "ls":
db = Sqlite3Trove.open(args.database)
entries = db.list_tree(args.parent_id)
db.close()
if entries:
for name, child_id in entries.items():
print(f"{name}: {child_id}")
else:
print("No entries.")
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)

View file

@ -2,16 +2,14 @@
trovedb.py Concrete implementation of Trove protocols backed by Sqlite3Trove.
Implements BlobNote, TreeNote, and Trove protocols defined in trove.py.
Depends on db.py (Sqlite3Trove) and tree.py (Tree) for storage and
tree serialization respectively.
Depends on db.py (Sqlite3Trove) for storage.
"""
from typing import Optional, Self
from typing import Optional
from pathlib import Path
import datetime as dt
from .db import Sqlite3Trove, NOTE_ROOT_ID
from .tree import Tree as TreeData
from . import trove as tr
@ -19,7 +17,7 @@ from .trove import Note, Trove, TreeNote, BlobNote, TreeEntry, NoteNotFound, Obj
class NoteImpl(Note):
"""Concrete not implementation"""
"""Concrete note implementation."""
def __init__(self, parent: 'TroveImpl', object_id: ObjectId):
self._parent = parent
@ -37,13 +35,14 @@ class NoteImpl(Note):
@property
def mtime(self) -> dt.datetime:
"""Return modification time as Unix timestamp, or None if not set."""
"""Return modification time as UTC datetime."""
return self._db.get_mtime(self._object_id)
@property
def mime(self) -> str:
"""Return MIME type, defaulting to generic binary stream."""
return "application/octet-stream"
"""Return MIME type from the objects table."""
info = self._db.get_info(self._object_id)
return info.type if info else "application/octet-stream"
def get_raw_metadata(self, key: str) -> Optional[bytes]:
return self._db.read_metadata(self._object_id, key)
@ -61,43 +60,25 @@ class BlobNoteImpl(NoteImpl, BlobNote):
return data if data is not None else b""
def write(self, data: bytes) -> None:
self._db.write_blob(data, self._object_id)
self._db.write_content(self._object_id, data)
class TreeNoteImpl(NoteImpl, TreeNote):
"""Concrete TreeNote: a tree object in the store with metadata access."""
def _read_tree(self) -> TreeData:
data = self._db.read_object(self._object_id)
return TreeData(data if data else None)
def _flush_tree(self, tree: TreeData) -> None:
self._db.write_tree(tree.serialize(), self._object_id)
"""Concrete TreeNote: a tree object backed by the tree_entries table."""
# Tree protocol
def link(self, name: str, note: Note) -> None:
"""Link name to an existing note (blob or tree)."""
tree = self._read_tree()
tree.set_entry(name, note.object_id)
self._flush_tree(tree)
"""Link name to an existing note."""
self._db.link(self._object_id, name, note.object_id)
def unlink(self, name: str) -> None:
"""Remove an entry by name. Raises KeyError if not found."""
try:
tree = self._read_tree()
tree.rm_entry(name)
self._flush_tree(tree)
except KeyError:
pass
"""Remove an entry by name."""
self._db.unlink(self._object_id, name)
def mkdir(self, name: str) -> 'TreeNoteImpl':
"""Create a new empty tree, link it under name, and return it."""
# Create the new node
new_id = self._db.write_tree(TreeData().serialize())
new_id = self._db.write_tree(b"")
tree = TreeNoteImpl(self._parent, new_id)
# Update our node
self.link(name, tree)
return tree
@ -107,8 +88,7 @@ class TreeNoteImpl(NoteImpl, TreeNote):
def child(self, name: str) -> Note:
"""Retrieve a child note by name."""
tree = self._read_tree()
entries = tree.list()
entries = self._db.list_tree(self._object_id)
if name not in entries:
raise KeyError(f"Entry '{name}' not found")
child_id = entries[name]
@ -119,13 +99,12 @@ class TreeNoteImpl(NoteImpl, TreeNote):
def entries(self):
"""Return all entries as an iterable of TreeEntry."""
tree = self._read_tree()
for name, object_id in tree.list().items():
for name, object_id in self._db.list_tree(self._object_id).items():
yield TreeEntry(name, object_id)
def list(self) -> dict[str, ObjectId]:
"""Return all entries as {name: object_id}."""
return self._read_tree().list()
return self._db.list_tree(self._object_id)
# ---------------------------------------------------------------------------
@ -145,14 +124,7 @@ class TroveImpl:
@classmethod
def open(cls, path: str | Path, create: bool = False) -> "TroveImpl":
db = Sqlite3Trove.open(path, create=create)
trove = cls(db)
if create:
# Root was written as a blob by Sqlite3Trove.open(); fix its type.
db._con.execute(
"UPDATE objects SET type = 'tree' WHERE id = ?", (NOTE_ROOT_ID,)
)
db._con.commit()
return trove
return cls(db)
@property
def db(self) -> Sqlite3Trove:
@ -170,23 +142,23 @@ class TroveImpl:
# Trove protocol
def get_raw_note(self, note_id: ObjectId) -> Note:
"""Return a BlobNote or TreeNote for the given id, or None if not found."""
ot = self._db.get_object_type(note_id)
if ot is None:
info = self._db.get_info(note_id)
if info is None:
raise NoteNotFound(note_id)
if ot == "blob":
return BlobNoteImpl(self, note_id)
if ot == "tree":
if self._db.is_tree(note_id) or info.type == "inode/directory":
return TreeNoteImpl(self, note_id)
raise ValueError(f"Unknown object type '{ot}' for id {note_id}")
return BlobNoteImpl(self, note_id)
def create_blob(self, data: bytes | None = None) -> BlobNote:
def create_blob(self, data: bytes | None = None,
dtype: str = "application/octet-stream") -> BlobNote:
"""Create a new blob object and return a BlobNote for it."""
obj_id = self._db.write_blob(data or b"")
obj_id = self._db.write_blob(data or b"", dtype=dtype)
return BlobNoteImpl(self, obj_id)
def get_root(self) -> TreeNote:
"""Return the root TreeNote (always id=NODE_ROOT_ID)."""
"""Return the root TreeNote (always id=NOTE_ROOT_ID)."""
return TreeNoteImpl(self, NOTE_ROOT_ID)
def open_db_trove(path: str | Path, create: bool = False, **kwargs: tr.OpenArguments) -> Trove:
return TroveImpl.open(path, create=create)