From 94d00c94d4ff6b55cca9116e3412083dea9dc7cb Mon Sep 17 00:00:00 2001 From: Andrew Mulbrook Date: Tue, 24 Mar 2026 22:51:56 -0500 Subject: [PATCH] Improve database schema for notes-with-children --- trovedb/db.py | 316 ++++++++++++++++++++++++++++++++++++++------- trovedb/trovedb.py | 82 ++++-------- 2 files changed, 299 insertions(+), 99 deletions(-) diff --git a/trovedb/db.py b/trovedb/db.py index a4d846f..762f4f2 100644 --- a/trovedb/db.py +++ b/trovedb/db.py @@ -10,17 +10,30 @@ import argparse import sqlite3 import sys import uuid +from typing import NamedTuple from datetime import datetime, timezone from pathlib import Path NOTE_ROOT_ID = uuid.UUID(int=0) +class ObjectInfo(NamedTuple): + id: uuid.UUID + type: str + created: datetime + modified: datetime + executable: bool + hidden: bool + + _SCHEMA = """ CREATE TABLE IF NOT EXISTS objects ( - id TEXT PRIMARY KEY, - type TEXT NOT NULL CHECK(type IN ('blob', 'tree')), - data BLOB, - modified TEXT NOT NULL + id TEXT PRIMARY KEY, + type TEXT NOT NULL, + data BLOB, + created REAL NOT NULL, + modified REAL NOT NULL, + executable INTEGER NOT NULL DEFAULT 0, + hidden INTEGER NOT NULL DEFAULT 0 ); CREATE TABLE IF NOT EXISTS metadata ( @@ -34,13 +47,25 @@ CREATE TABLE IF NOT EXISTS labels ( label TEXT PRIMARY KEY, id TEXT NOT NULL REFERENCES objects(id) ON DELETE CASCADE -); +); + +CREATE TABLE IF NOT EXISTS tree_entries ( + parent_id TEXT NOT NULL REFERENCES objects(id) ON DELETE CASCADE, + name TEXT NOT NULL, + child_id TEXT NOT NULL REFERENCES objects(id) ON DELETE CASCADE, + PRIMARY KEY (parent_id, name) +); """ type SqlObjectId = str | uuid.UUID -def _now() -> str: - return datetime.now(timezone.utc).isoformat() +def _now() -> float: + """Current UTC time as a Unix epoch float.""" + return datetime.now(timezone.utc).timestamp() + +def _to_datetime(ts: float) -> datetime: + """Convert a Unix epoch float to a UTC datetime.""" + return datetime.fromtimestamp(ts, tz=timezone.utc) def _sql_id(id: SqlObjectId | None) -> str | None: if id is None: @@ -49,10 +74,6 @@ def _sql_id(id: SqlObjectId | None) -> str | None: id if isinstance(id, str) else str(id) ) -def _initialize_db(con: sqlite3.Connection): - con.executescript(_SCHEMA) - con.commit() - class Sqlite3Trove: def __init__(self, con: sqlite3.Connection): @@ -79,7 +100,7 @@ class Sqlite3Trove: con.commit() obj = cls(con) if initialize: - obj.write_tree(b"", NOTE_ROOT_ID) + obj._write_object(b"", "inode/directory", NOTE_ROOT_ID) return obj def close(self): @@ -91,6 +112,36 @@ class Sqlite3Trove: def __exit__(self, *_): self.close() + # ------------------------------------------------------------------ + # Object info + # ------------------------------------------------------------------ + + def get_info(self, object_id: SqlObjectId) -> ObjectInfo | None: + """Return an ObjectInfo namedtuple for the object, or None if not found.""" + row = self._con.execute( + "SELECT id, type, created, modified, executable, hidden " + "FROM objects WHERE id = ?", + (_sql_id(object_id),), + ).fetchone() + if row is None: + return None + return ObjectInfo( + id=uuid.UUID(row["id"]), + type=row["type"], + created=_to_datetime(row["created"]), + modified=_to_datetime(row["modified"]), + executable=bool(row["executable"]), + hidden=bool(row["hidden"]), + ) + + def is_tree(self, object_id: SqlObjectId) -> bool: + """Return True if the object has any children in tree_entries.""" + row = self._con.execute( + "SELECT 1 FROM tree_entries WHERE parent_id = ? LIMIT 1", + (_sql_id(object_id),), + ).fetchone() + return row is not None + # ------------------------------------------------------------------ # CRUD operations # ------------------------------------------------------------------ @@ -103,9 +154,9 @@ class Sqlite3Trove: return row["type"] if row else None def read_object(self, object_id: SqlObjectId) -> bytes | None: - """Return raw data for a blob object, or None if not found.""" + """Return raw data for an object, or None if not found.""" row = self._con.execute( - "SELECT data, type FROM objects WHERE id = ?", (_sql_id(object_id),) + "SELECT data FROM objects WHERE id = ?", (_sql_id(object_id),) ).fetchone() if row is None: return None @@ -118,7 +169,7 @@ class Sqlite3Trove: ).fetchone() if row is None: return None - return datetime.fromisoformat(row["modified"]) + return _to_datetime(row["modified"]) def read_metadata(self, object_id: SqlObjectId, key: str) -> bytes | None: """Return raw metadata value for (uuid, key), or None if not found.""" @@ -130,45 +181,101 @@ class Sqlite3Trove: return bytes(row["value"]) if row["value"] is not None else b"" def write_metadata(self, object_id: SqlObjectId, key: str, value: bytes) -> None: - """Upsert a metadata row. db.py has no write_metadata, so we go direct.""" + """Upsert a metadata row.""" self._con.execute( "INSERT OR REPLACE INTO metadata (id, key, value) VALUES (?, ?, ?)", (_sql_id(object_id), key, value), ) self._con.commit() - def _write_object(self, data: bytes, dtype: str, object_id: str | uuid.UUID | None = None) -> str: - """ - Insert or replace an object. Returns the id. - If object_id is None, creates a new object with a new UUID. - If object_id is provided, updates or creates the object with that ID. - """ - modified = _now() - if object_id is None: - object_id = uuid.uuid4() + def write_content(self, object_id: SqlObjectId, data: bytes) -> None: + """Update only the data and modified timestamp. Preserves type and flags.""" self._con.execute( - "INSERT OR REPLACE INTO objects (id, type, data, modified) VALUES (?, ?, ?, ?)", - (_sql_id(object_id), dtype, data, modified) + "UPDATE objects SET data = ?, modified = ? WHERE id = ?", + (data, _now(), _sql_id(object_id)), ) self._con.commit() - return _sql_id(object_id) - def write_blob(self, data: bytes, object_id: SqlObjectId | None = None) -> str: + def _write_object( + self, + data: bytes, + dtype: str, + object_id: str | uuid.UUID | None = None, + executable: bool = False, + hidden: bool = False, + ) -> str: + """ + Insert or replace an object. Returns the id. + On INSERT, both created and modified are set to now. + On REPLACE (existing id), created is preserved and modified is updated. + """ + now = _now() + if object_id is None: + object_id = uuid.uuid4() + sid = _sql_id(object_id) + + # Preserve created timestamp on update + row = self._con.execute( + "SELECT created FROM objects WHERE id = ?", (sid,) + ).fetchone() + created = row["created"] if row else now + + self._con.execute( + """INSERT INTO objects (id, type, data, created, modified, executable, hidden) + VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(id) DO UPDATE SET + type=excluded.type, data=excluded.data, modified=excluded.modified, + executable=excluded.executable, hidden=excluded.hidden""", + (sid, dtype, data, created, now, int(executable), int(hidden)), + ) + self._con.commit() + return sid + + def write_blob(self, data: bytes, object_id: SqlObjectId | None = None, + dtype: str = "application/octet-stream", + executable: bool = False, hidden: bool = False) -> str: """ Insert or replace a blob. Returns the id. Pass object_id to update an existing object. """ - return self._write_object(data, "blob", _sql_id(object_id)) + return self._write_object(data, dtype, _sql_id(object_id), + executable=executable, hidden=hidden) - def write_tree(self, data: bytes, object_id: SqlObjectId | None = None) -> str: + def write_tree(self, data: bytes, object_id: SqlObjectId | None = None, + hidden: bool = False) -> str: """Write a tree-typed object. Returns the assigned id.""" - return self._write_object(data, "tree", _sql_id(object_id)) + return self._write_object(data, "inode/directory", _sql_id(object_id), + hidden=hidden) + + def set_executable(self, object_id: SqlObjectId, executable: bool) -> None: + """Set or clear the executable flag on an object.""" + self._con.execute( + "UPDATE objects SET executable = ?, modified = ? WHERE id = ?", + (int(executable), _now(), _sql_id(object_id)), + ) + self._con.commit() + + def set_hidden(self, object_id: SqlObjectId, hidden: bool) -> None: + """Set or clear the hidden flag on an object.""" + self._con.execute( + "UPDATE objects SET hidden = ?, modified = ? WHERE id = ?", + (int(hidden), _now(), _sql_id(object_id)), + ) + self._con.commit() + + def set_type(self, object_id: SqlObjectId, dtype: str) -> None: + """Update the MIME type of an object.""" + self._con.execute( + "UPDATE objects SET type = ?, modified = ? WHERE id = ?", + (dtype, _now(), _sql_id(object_id)), + ) + self._con.commit() def delete_object(self, object_id: SqlObjectId) -> bool: """ - Delete a blob and all its metadata rows. + Delete an object and all its metadata rows. Returns True if an object was deleted, False if id not found. - Foreign key cascade handles the metadata rows. + Foreign key cascade handles the metadata and tree_entries rows. """ cur = self._con.execute( "DELETE FROM objects WHERE id = ?", (_sql_id(object_id),) @@ -176,6 +283,62 @@ class Sqlite3Trove: self._con.commit() return cur.rowcount > 0 + # ------------------------------------------------------------------ + # Tree entry operations + # ------------------------------------------------------------------ + + def link(self, parent_id: SqlObjectId, name: str, child_id: SqlObjectId) -> None: + """ + Link a child object into a tree under the given name. + Replaces any existing entry with the same name in this tree. + Both parent_id and child_id must exist in the objects table + (enforced by FK constraints). + """ + self._con.execute( + "INSERT OR REPLACE INTO tree_entries (parent_id, name, child_id) " + "VALUES (?, ?, ?)", + (_sql_id(parent_id), name, _sql_id(child_id)), + ) + self._con.execute( + "UPDATE objects SET modified = ? WHERE id = ?", + (_now(), _sql_id(parent_id)), + ) + self._con.commit() + + def unlink(self, parent_id: SqlObjectId, name: str) -> bool: + """ + Remove a named entry from a tree. + Returns True if an entry was removed, False if not found. + Does not delete the child object itself. + """ + cur = self._con.execute( + "DELETE FROM tree_entries WHERE parent_id = ? AND name = ?", + (_sql_id(parent_id), name), + ) + if cur.rowcount > 0: + self._con.execute( + "UPDATE objects SET modified = ? WHERE id = ?", + (_now(), _sql_id(parent_id)), + ) + self._con.commit() + return cur.rowcount > 0 + + def list_tree(self, parent_id: SqlObjectId) -> dict[str, str]: + """ + Return all entries in a tree as {name: child_id}. + Returns an empty dict if the tree has no entries. + """ + rows = self._con.execute( + "SELECT name, child_id FROM tree_entries " + "WHERE parent_id = ? ORDER BY name", + (_sql_id(parent_id),), + ).fetchall() + return {row["name"]: row["child_id"] for row in rows} + + # ------------------------------------------------------------------ + # Label operations + # ------------------------------------------------------------------ + def get_label(self, label: str) -> SqlObjectId | None: """ Return the ID associated with a label, or None if not found. @@ -228,18 +391,25 @@ def main(): # Create database subparsers.add_parser("create", help="Create a new database") - # Get blob - get_parser = subparsers.add_parser("get", help="Get a blob object by ID") - get_parser.add_argument("id", help="ID of the blob to retrieve") + # Get object data + get_parser = subparsers.add_parser("get", help="Get object data by ID") + get_parser.add_argument("id", help="ID of the object to retrieve") + + # Info + info_parser = subparsers.add_parser("info", help="Show object metadata") + info_parser.add_argument("id", help="Object ID") # Write blob write_parser = subparsers.add_parser("write", help="Write data to a blob") write_parser.add_argument("data", help="Data to write (as string, will be encoded as UTF-8)") write_parser.add_argument("--id", help="ID of existing blob to update (optional)") + write_parser.add_argument("--type", default="text/plain", help="MIME type (default: text/plain)") + write_parser.add_argument("--executable", action="store_true", help="Mark as executable") + write_parser.add_argument("--hidden", action="store_true", help="Mark as hidden") - # Delete blob - delete_parser = subparsers.add_parser("delete", help="Delete a blob by ID") - delete_parser.add_argument("id", help="ID of the blob to delete") + # Delete object + delete_parser = subparsers.add_parser("delete", help="Delete an object by ID") + delete_parser.add_argument("id", help="ID of the object to delete") # Set label setlabel_parser = subparsers.add_parser("setlabel", help="Create or update a label to point to an ID") @@ -253,6 +423,19 @@ def main(): # List labels subparsers.add_parser("labels", help="List all labels") + # Tree operations + link_parser = subparsers.add_parser("link", help="Link a child into a tree") + link_parser.add_argument("parent_id", help="Parent tree object ID") + link_parser.add_argument("name", help="Entry name") + link_parser.add_argument("child_id", help="Child object ID") + + unlink_parser = subparsers.add_parser("unlink", help="Unlink a child from a tree") + unlink_parser.add_argument("parent_id", help="Parent tree object ID") + unlink_parser.add_argument("name", help="Entry name to remove") + + ls_parser = subparsers.add_parser("ls", help="List tree entries") + ls_parser.add_argument("parent_id", help="Tree object ID to list") + args = parser.parse_args() try: @@ -270,14 +453,33 @@ def main(): data = db.read_object(args.id) db.close() if data is None: - print(f"Blob not found: {args.id}") + print(f"Object not found: {args.id}") sys.exit(1) sys.stdout.buffer.write(data) + case "info": + db = Sqlite3Trove.open(args.database) + obj = db.get_info(args.id) + has_children = db.is_tree(args.id) + db.close() + if obj is None: + print(f"Object not found: {args.id}") + sys.exit(1) + print(f"id: {obj.id}") + print(f"type: {obj.type}") + print(f"created: {obj.created.isoformat()}") + print(f"modified: {obj.modified.isoformat()}") + print(f"executable: {obj.executable}") + print(f"hidden: {obj.hidden}") + print(f"children: {has_children}") + case "write": data_bytes = args.data.encode("utf-8") db = Sqlite3Trove.open(args.database) - object_id = db.write_blob(data_bytes, args.id) + object_id = db.write_blob(data_bytes, args.id, + dtype=args.type, + executable=args.executable, + hidden=args.hidden) db.close() print(object_id) @@ -286,9 +488,9 @@ def main(): deleted = db.delete_object(args.id) db.close() if deleted: - print(f"Deleted blob: {args.id}") + print(f"Deleted: {args.id}") else: - print(f"Blob not found: {args.id}") + print(f"Object not found: {args.id}") sys.exit(1) case "setlabel": @@ -317,6 +519,32 @@ def main(): else: print("No labels found.") + case "link": + db = Sqlite3Trove.open(args.database) + db.link(args.parent_id, args.name, args.child_id) + db.close() + print(f"Linked '{args.name}' -> {args.child_id} in {args.parent_id}") + + case "unlink": + db = Sqlite3Trove.open(args.database) + removed = db.unlink(args.parent_id, args.name) + db.close() + if removed: + print(f"Unlinked '{args.name}' from {args.parent_id}") + else: + print(f"Entry '{args.name}' not found in {args.parent_id}") + sys.exit(1) + + case "ls": + db = Sqlite3Trove.open(args.database) + entries = db.list_tree(args.parent_id) + db.close() + if entries: + for name, child_id in entries.items(): + print(f"{name}: {child_id}") + else: + print("No entries.") + except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) diff --git a/trovedb/trovedb.py b/trovedb/trovedb.py index 5e0e877..435ac64 100644 --- a/trovedb/trovedb.py +++ b/trovedb/trovedb.py @@ -2,16 +2,14 @@ trovedb.py — Concrete implementation of Trove protocols backed by Sqlite3Trove. Implements BlobNote, TreeNote, and Trove protocols defined in trove.py. -Depends on db.py (Sqlite3Trove) and tree.py (Tree) for storage and -tree serialization respectively. +Depends on db.py (Sqlite3Trove) for storage. """ -from typing import Optional, Self +from typing import Optional from pathlib import Path import datetime as dt from .db import Sqlite3Trove, NOTE_ROOT_ID -from .tree import Tree as TreeData from . import trove as tr @@ -19,7 +17,7 @@ from .trove import Note, Trove, TreeNote, BlobNote, TreeEntry, NoteNotFound, Obj class NoteImpl(Note): - """Concrete not implementation""" + """Concrete note implementation.""" def __init__(self, parent: 'TroveImpl', object_id: ObjectId): self._parent = parent @@ -37,13 +35,14 @@ class NoteImpl(Note): @property def mtime(self) -> dt.datetime: - """Return modification time as Unix timestamp, or None if not set.""" + """Return modification time as UTC datetime.""" return self._db.get_mtime(self._object_id) @property def mime(self) -> str: - """Return MIME type, defaulting to generic binary stream.""" - return "application/octet-stream" + """Return MIME type from the objects table.""" + info = self._db.get_info(self._object_id) + return info.type if info else "application/octet-stream" def get_raw_metadata(self, key: str) -> Optional[bytes]: return self._db.read_metadata(self._object_id, key) @@ -61,43 +60,25 @@ class BlobNoteImpl(NoteImpl, BlobNote): return data if data is not None else b"" def write(self, data: bytes) -> None: - self._db.write_blob(data, self._object_id) + self._db.write_content(self._object_id, data) class TreeNoteImpl(NoteImpl, TreeNote): - """Concrete TreeNote: a tree object in the store with metadata access.""" - - def _read_tree(self) -> TreeData: - data = self._db.read_object(self._object_id) - return TreeData(data if data else None) - - def _flush_tree(self, tree: TreeData) -> None: - self._db.write_tree(tree.serialize(), self._object_id) + """Concrete TreeNote: a tree object backed by the tree_entries table.""" # Tree protocol def link(self, name: str, note: Note) -> None: - """Link name to an existing note (blob or tree).""" - tree = self._read_tree() - tree.set_entry(name, note.object_id) - self._flush_tree(tree) + """Link name to an existing note.""" + self._db.link(self._object_id, name, note.object_id) def unlink(self, name: str) -> None: - """Remove an entry by name. Raises KeyError if not found.""" - try: - tree = self._read_tree() - tree.rm_entry(name) - self._flush_tree(tree) - except KeyError: - pass + """Remove an entry by name.""" + self._db.unlink(self._object_id, name) def mkdir(self, name: str) -> 'TreeNoteImpl': """Create a new empty tree, link it under name, and return it.""" - - # Create the new node - new_id = self._db.write_tree(TreeData().serialize()) + new_id = self._db.write_tree(b"") tree = TreeNoteImpl(self._parent, new_id) - - # Update our node self.link(name, tree) return tree @@ -107,8 +88,7 @@ class TreeNoteImpl(NoteImpl, TreeNote): def child(self, name: str) -> Note: """Retrieve a child note by name.""" - tree = self._read_tree() - entries = tree.list() + entries = self._db.list_tree(self._object_id) if name not in entries: raise KeyError(f"Entry '{name}' not found") child_id = entries[name] @@ -119,13 +99,12 @@ class TreeNoteImpl(NoteImpl, TreeNote): def entries(self): """Return all entries as an iterable of TreeEntry.""" - tree = self._read_tree() - for name, object_id in tree.list().items(): + for name, object_id in self._db.list_tree(self._object_id).items(): yield TreeEntry(name, object_id) def list(self) -> dict[str, ObjectId]: """Return all entries as {name: object_id}.""" - return self._read_tree().list() + return self._db.list_tree(self._object_id) # --------------------------------------------------------------------------- @@ -145,14 +124,7 @@ class TroveImpl: @classmethod def open(cls, path: str | Path, create: bool = False) -> "TroveImpl": db = Sqlite3Trove.open(path, create=create) - trove = cls(db) - if create: - # Root was written as a blob by Sqlite3Trove.open(); fix its type. - db._con.execute( - "UPDATE objects SET type = 'tree' WHERE id = ?", (NOTE_ROOT_ID,) - ) - db._con.commit() - return trove + return cls(db) @property def db(self) -> Sqlite3Trove: @@ -170,23 +142,23 @@ class TroveImpl: # Trove protocol def get_raw_note(self, note_id: ObjectId) -> Note: """Return a BlobNote or TreeNote for the given id, or None if not found.""" - ot = self._db.get_object_type(note_id) - if ot is None: + info = self._db.get_info(note_id) + if info is None: raise NoteNotFound(note_id) - if ot == "blob": - return BlobNoteImpl(self, note_id) - if ot == "tree": + if self._db.is_tree(note_id) or info.type == "inode/directory": return TreeNoteImpl(self, note_id) - raise ValueError(f"Unknown object type '{ot}' for id {note_id}") + return BlobNoteImpl(self, note_id) - def create_blob(self, data: bytes | None = None) -> BlobNote: + def create_blob(self, data: bytes | None = None, + dtype: str = "application/octet-stream") -> BlobNote: """Create a new blob object and return a BlobNote for it.""" - obj_id = self._db.write_blob(data or b"") + obj_id = self._db.write_blob(data or b"", dtype=dtype) return BlobNoteImpl(self, obj_id) def get_root(self) -> TreeNote: - """Return the root TreeNote (always id=NODE_ROOT_ID).""" + """Return the root TreeNote (always id=NOTE_ROOT_ID).""" return TreeNoteImpl(self, NOTE_ROOT_ID) + def open_db_trove(path: str | Path, create: bool = False, **kwargs: tr.OpenArguments) -> Trove: return TroveImpl.open(path, create=create)