Move away from inodes as direct db reference

This commit is contained in:
Andrew Mulbrook 2026-03-21 12:08:54 -05:00
parent f80f4d12a2
commit e16d67e2f8
4 changed files with 375 additions and 154 deletions

View file

@ -13,34 +13,184 @@ import errno
import os
import stat
import time
import logging
from typing import Sequence, Tuple, cast
import pyfuse3
import trio
from pyfuse3 import InodeT, FileHandleT
from trovedb.trove import Trove, Note, Tree as TroveTree, TreeNote, Blob as TroveBlob
from trovedb.trove import Trove, Note, Tree as TroveTree, TreeNote, Blob as TroveBlob, ObjectId, TreeExists
logger = logging.getLogger(__name__)
class _TroveEntry:
__slots__ = [ 'object_id', 'sys_inode', 'ref_count' ]
def __init__(self, sys_inode: InodeT, object_id: ObjectId | None):
self.object_id: ObjectId | None = object_id
self.sys_inode: InodeT = sys_inode
self.ref_count = 0
def ref(self) -> None:
self.ref_count += 1
def deref(self, count: int = 1) -> bool:
assert self.ref_count > 0
self.ref_count -= count
return self.ref_count <= 0
class _TroveHandle:
__slots__ = [ 'inode_id', 'ref_count', 'note', 'handle_id' ]
def __init__(self, inode_id: InodeT, handle_id: FileHandleT, note: Note):
self.inode_id = inode_id
self.handle_id = handle_id
self.note = note
class _TroveHandleTree(_TroveHandle):
@property
def tree(self) -> TreeNote:
return cast(TreeNote, self.note)
class TroveFuseOps(pyfuse3.Operations):
enable_writeback_cache = False
def __init__(self, trove: Trove):
super().__init__()
self._trove = trove
# Inode Cache
self._next_inode = 2
self._inode_cache: dict[InodeT, _TroveEntry] = {}
self._inode_reverse_cache: dict[ObjectId, InodeT] = {}
# Cache and Lock Root Inode
node_root = trove.get_root()
self._inode_cache[pyfuse3.ROOT_INODE] = _TroveEntry(pyfuse3.ROOT_INODE, node_root.object_id)
self._inode_reverse_cache[node_root.object_id] = pyfuse3.ROOT_INODE
self._inode_cache[pyfuse3.ROOT_INODE].ref()
self._inode_cache[pyfuse3.ROOT_INODE].ref()
# Handles
self._next_handle = 2
self._handles: dict[int, _TroveHandle] = {}
# ------------------------------------------------------------------
# Helpers
# Entry Management [entries relate inode to Note]
# ------------------------------------------------------------------
def _note_or_error(self, inode: int):
note = self._trove.get_raw_note(inode)
def _get_ent_from_inode(self, inode: InodeT) -> _TroveEntry:
"""Get entry from predefined inode"""
if inode not in self._inode_cache:
logger.debug("inode not found in cache: %d", inode)
raise pyfuse3.FUSEError(errno.ENOENT)
value = self._inode_cache[inode]
return value
def _create_get_ent_from_note(self, note: Note) -> _TroveEntry:
"""Create entry from note. Inode is reserved but not saved in cache."""
if note.object_id in self._inode_reverse_cache:
sys_inode = self._inode_reverse_cache[note.object_id]
return self._inode_cache[sys_inode]
sys_inode = InodeT(self._next_inode)
self._next_inode += 1
return _TroveEntry(sys_inode=sys_inode, object_id=note.object_id)
def _ref_entry(self, ent: _TroveEntry) -> None:
"""Ref entry. If it is not in cache, it is added to cache."""
if ent.sys_inode not in self._inode_cache:
self._inode_cache[ent.sys_inode] = ent
self._inode_reverse_cache[ent.object_id] = ent.sys_inode
ent.ref()
def _deref_entry(self, ent: _TroveEntry, count: int = 1) -> None:
"""Deref entry. Remove from cache if count hits 0"""
if ent.deref(count):
if ent.sys_inode in self._inode_cache:
logger.debug("free inode: %d", ent.sys_inode)
del self._inode_cache[ent.sys_inode]
del self._inode_reverse_cache[ent.object_id]
def _get_inode_note(self, inode: InodeT) -> Note:
"""Get note from Inode, inode must be reserved"""
ent = self._get_ent_from_inode(inode)
return self._get_ent_note(ent)
def _get_ent_note(self, ent: _TroveEntry) -> Note:
"""Get note from entry."""
note = self._trove.get_raw_note(ent.object_id)
if note is None:
logger.debug("note lookup failed: %s", ent.object_id)
raise pyfuse3.FUSEError(errno.ENOENT)
return note
def _make_attr(self, inode: int, is_tree: bool, size: int = 0) -> pyfuse3.EntryAttributes:
def _lookup_update_object(self, object_id: ObjectId) -> _TroveEntry:
if object_id in self._inode_reverse_cache:
inode = self._inode_reverse_cache[object_id]
return self._lookup_existing(inode)
else:
inode_id = InodeT(self._next_inode)
self._next_inode += 1
inode = _TroveEntry(sys_inode=inode_id, object_id=object_id)
self._inode_cache[inode_id] = inode
self._inode_reverse_cache[object_id] = inode_id
return inode
def _lookup_child(self, parent_inode: InodeT, name: bytes) -> Tuple[_TroveEntry, Note]:
parent = self._get_inode_note(parent_inode)
if not isinstance(parent, TreeNote):
raise pyfuse3.FUSEError(errno.ENOTDIR)
try:
note = parent.child(name.decode())
except KeyError:
logger.debug("lookup failed: %d -> %s", parent_inode, name.decode())
raise pyfuse3.FUSEError(errno.ENOENT) from None
ent = self._create_get_ent_from_note(note)
return ent, note
def _get_sys_inode_id(self, object_id: ObjectId) -> InodeT:
if object_id in self._inode_reverse_cache:
return self._inode_reverse_cache[object_id]
else:
raise pyfuse3.FUSEError(errno.ENOENT)
# ------------------------------------------------------------------
# Handle Management
# ------------------------------------------------------------------
def _open_handle(self, inode: InodeT) -> _TroveHandle:
note = self._get_inode_note(inode)
handle_id = FileHandleT(self._next_handle)
self._next_handle += 1
handle: _TroveHandle
if isinstance(note, TreeNote):
handle = _TroveHandleTree(inode_id=inode, handle_id=handle_id, note=note)
else:
handle = _TroveHandle(inode_id=inode, handle_id=handle_id, note=note)
self._handles[handle_id] = handle
return handle
def _get_handle(self, handle_id: FileHandleT) -> _TroveHandle:
if not handle_id in self._handles:
raise pyfuse3.FUSEError(errno.EBADF)
return self._handles[handle_id]
def _close_handle(self, handle: _TroveHandle):
del self._handles[handle.handle_id]
def _get_attr(self, ent: _TroveEntry, note: Note) -> pyfuse3.EntryAttributes:
# Determine basic information
is_tree = True
size = 0
if isinstance(note, TroveBlob):
size = len(note.read())
is_tree = False
# Create and fill attr structure
attr = pyfuse3.EntryAttributes()
attr.st_ino = pyfuse3.InodeT(inode)
attr.st_ino = ent.sys_inode
attr.st_nlink = 1
attr.st_uid = os.getuid()
attr.st_gid = os.getgid()
@ -63,116 +213,128 @@ class TroveFuseOps(pyfuse3.Operations):
attr.st_blocks = (size + 511) // 512
return attr
def _attr_for_note(self, note: Note) -> pyfuse3.EntryAttributes:
size = 0
is_tree = True
if isinstance(note, TroveBlob):
size = len(note.read())
is_tree = False
return self._make_attr(note.object_id, is_tree, size)
# ------------------------------------------------------------------
# Stat / lookup
# ------------------------------------------------------------------
async def getattr(self, inode: int, ctx=None) -> pyfuse3.EntryAttributes:
note = self._note_or_error(inode)
return self._attr_for_note(note)
async def getattr(self, inode: InodeT, ctx=None) -> pyfuse3.EntryAttributes:
logger.debug("getattr inode:%d", inode)
ent = self._get_ent_from_inode(inode)
note = self._get_ent_note(ent)
return self._get_attr(ent, note)
async def lookup(self, parent_inode: int, name: bytes, ctx=None) -> pyfuse3.EntryAttributes:
parent = self._note_or_error(parent_inode)
if not isinstance(parent, TroveTree):
raise pyfuse3.FUSEError(errno.ENOTDIR)
entries = parent.list()
name_str = name.decode()
if name_str not in entries:
raise pyfuse3.FUSEError(errno.ENOENT)
child = self._trove.get_raw_note(entries[name_str])
if child is None:
raise pyfuse3.FUSEError(errno.ENOENT)
return self._attr_for_note(child)
async def lookup(self, parent_inode: InodeT, name: bytes, ctx=None) -> pyfuse3.EntryAttributes:
logger.debug("lookup inode:%d name:%s", parent_inode, name)
ent, child = self._lookup_child(parent_inode, name)
self._ref_entry(ent)
return self._get_attr(ent, child)
async def setattr(self, inode: int, attr, fields, fh, ctx) -> pyfuse3.EntryAttributes:
note = self._note_or_error(inode)
if fields.update_size and not isinstance(note, TroveTree):
current = note.read()
new_size = attr.st_size
if new_size < len(current):
note.write(current[:new_size])
elif new_size > len(current):
note.write(current + b"\x00" * (new_size - len(current)))
return self._attr_for_note(note)
async def setattr(self, inode: InodeT, attr, fields, fh: FileHandleT | None, ctx) -> pyfuse3.EntryAttributes:
ent = self._get_ent_from_inode(inode)
note = self._get_ent_note(ent)
if fields.update_size:
if isinstance(note, TroveBlob):
current = note.read()
new_size = attr.st_size
if new_size < len(current):
note.write(current[:new_size])
elif new_size > len(current):
note.write(current + b"\x00" * (new_size - len(current)))
else:
raise pyfuse3.FUSEError(errno.EINVAL)
return self._get_attr(ent, note)
async def forget(self, inode_list) -> None:
pass
def forget(self, inode_list: Sequence[Tuple[InodeT, int]]) -> None:
for inode, nlookup in inode_list:
try:
logger.debug("deref inode:%d count:%d", inode, nlookup)
self._deref_entry(self._get_ent_from_inode(inode), nlookup)
except pyfuse3.FUSEError as e:
logger.warning("Failed to deref inode %d: %s", inode, str(e))
# ------------------------------------------------------------------
# Directory ops
# ------------------------------------------------------------------
async def opendir(self, inode: int, ctx) -> pyfuse3.FileHandleT:
note = self._note_or_error(inode)
if not isinstance(note, TroveTree):
async def opendir(self, inode: InodeT, ctx) -> FileHandleT:
handle = self._open_handle(inode)
if not isinstance(handle, _TroveHandleTree):
logger.debug("attempted opendir on %d not a tree", inode)
self._close_handle(handle)
raise pyfuse3.FUSEError(errno.ENOTDIR)
return pyfuse3.FileHandleT(inode)
logger.debug("opened dir inode %d -> handle %d", inode, handle.handle_id)
return handle.handle_id
async def readdir(self, fh: int, start_id: int, token) -> None:
note = self._note_or_error(fh)
async def readdir(self, fh: FileHandleT, start_id: int, token) -> None:
logger.debug("readdir %d start_id %d", fh, start_id)
handle = self._get_handle(fh)
note = handle.note
if not isinstance(note, TroveTree):
logger.debug("attempted readdir on %d not a tree", fh)
raise pyfuse3.FUSEError(errno.ENOTDIR)
entries = list(note.list().items()) # [(name, object_id), ...]
for idx, (name, child_id) in enumerate(entries):
if idx < start_id:
continue
child = self._trove.get_raw_note(child_id)
if child is None:
continue
attr = self._attr_for_note(child)
child_ent = self._create_get_ent_from_note(child)
attr = self._get_attr(child_ent, child)
self._ref_entry(child_ent)
if not pyfuse3.readdir_reply(token, name.encode(), attr, idx + 1):
break
async def releasedir(self, fh: int) -> None:
pass
async def releasedir(self, fh: FileHandleT) -> None:
logger.debug("releasedir %d", fh)
handle = self._get_handle(fh)
self._close_handle(handle)
async def mkdir(self, parent_inode: int, name: bytes, mode: int, ctx) -> pyfuse3.EntryAttributes:
parent = self._note_or_error(parent_inode)
async def mkdir(self, parent_inode: InodeT, name: bytes, mode: int, ctx) -> pyfuse3.EntryAttributes:
logger.debug("mkdir inode:%d name:%s", parent_inode, name)
# Grab parent note, verify is tree
parent = self._get_inode_note(parent_inode)
if not isinstance(parent, TreeNote):
raise pyfuse3.FUSEError(errno.ENOTDIR)
name_str = name.decode()
if name_str in parent.list():
raise pyfuse3.FUSEError(errno.EEXIST)
new_tree: TreeNote = parent.mkdir(name_str)
return self._make_attr(new_tree.object_id, True, 0)
# Create new directory in note
try:
new_tree: TreeNote = parent.mkdir(name.decode())
except TreeExists:
raise pyfuse3.FUSEError(errno.EEXIST) from None
# Grab entity for kernel
ent = self._create_get_ent_from_note(new_tree)
self._ref_entry(ent)
return self._get_attr(ent, new_tree)
async def rmdir(self, parent_inode: int, name: bytes, ctx) -> None:
parent = self._note_or_error(parent_inode)
if not isinstance(parent, TroveTree):
async def rmdir(self, parent_inode: InodeT, name: bytes, ctx) -> None:
logger.debug("rmdir inode:%d name:%s", parent_inode, name)
parent = self._get_inode_note(parent_inode)
if not isinstance(parent, TreeNote):
raise pyfuse3.FUSEError(errno.ENOTDIR)
name_str = name.decode()
entries = parent.list()
if name_str not in entries:
raise pyfuse3.FUSEError(errno.ENOENT)
child = self._trove.get_raw_note(entries[name_str])
if child is None:
raise pyfuse3.FUSEError(errno.ENOENT)
if not isinstance(child, TroveTree):
raise pyfuse3.FUSEError(errno.ENOTDIR)
if child.list():
raise pyfuse3.FUSEError(errno.ENOTEMPTY)
parent.unlink(name_str)
try:
parent.unlink(name.decode())
except KeyError:
raise pyfuse3.FUSEError(errno.ENOENT) from None
# ------------------------------------------------------------------
# File ops
# ------------------------------------------------------------------
async def open(self, inode: int, flags, ctx) -> pyfuse3.FileInfo:
note = self._note_or_error(inode)
if isinstance(note, TroveTree):
raise pyfuse3.FUSEError(errno.EISDIR)
return pyfuse3.FileInfo(fh=pyfuse3.FileHandleT(inode))
async def open(self, inode: InodeT, flags, ctx) -> pyfuse3.FileInfo:
handle = self._open_handle(inode)
if isinstance(handle.note, TroveTree):
self._close_handle(handle)
raise pyfuse3.FUSEError(errno.EISDIR)
return pyfuse3.FileInfo(fh=handle.handle_id)
async def create(self, parent_inode: int, name: bytes, mode: int, flags, ctx) -> tuple:
parent = self._note_or_error(parent_inode)
async def create(self, parent_inode: InodeT, name: bytes, mode: int, flags, ctx) -> tuple:
logger.debug("create inode:%d name:%s", parent_inode, name)
parent = self._get_inode_note(parent_inode)
if not isinstance(parent, TroveTree):
raise pyfuse3.FUSEError(errno.ENOTDIR)
name_str = name.decode()
@ -180,67 +342,66 @@ class TroveFuseOps(pyfuse3.Operations):
raise pyfuse3.FUSEError(errno.EEXIST)
blob = self._trove.create_blob(b"")
parent.link(name_str, blob)
attr = self._make_attr(blob.object_id, False, 0)
return pyfuse3.FileInfo(fh=pyfuse3.FileHandleT(blob.object_id)), attr
async def read(self, fh: int, offset: int, length: int) -> bytes:
note = self._note_or_error(fh)
return note.read()[offset:offset + length]
ent = self._create_get_ent_from_note(blob)
self._ref_entry(ent)
async def write(self, fh: int, offset: int, data: bytes) -> int:
note = self._note_or_error(fh)
existing = note.read()
if offset > len(existing):
existing = existing + b"\x00" * (offset - len(existing))
note.write(existing[:offset] + data + existing[offset + len(data):])
handle = self._open_handle(ent.sys_inode)
attr = self._get_attr(ent, blob)
return pyfuse3.FileInfo(fh=handle.handle_id), attr
async def read(self, fh: FileHandleT, offset: int, length: int) -> bytes:
logger.debug("read fh:%d offset:%d length:%d", fh, offset, length)
handle = self._get_handle(fh)
note = handle.note
if isinstance(note, TroveBlob):
return note.read()[offset:offset + length]
raise pyfuse3.FUSEError(errno.EBADF)
async def write(self, fh: FileHandleT, offset: int, data: bytes) -> int:
handle = self._get_handle(fh)
note = handle.note
if isinstance(note, TroveBlob):
existing = note.read()
if offset > len(existing):
existing = existing + b"\x00" * (offset - len(existing))
note.write(existing[:offset] + data + existing[offset + len(data):])
return len(data)
async def release(self, fh: int) -> None:
pass
async def release(self, fh: FileHandleT) -> None:
handle = self._get_handle(fh)
self._close_handle(handle)
async def unlink(self, parent_inode: int, name: bytes, ctx) -> None:
parent = self._note_or_error(parent_inode)
if not isinstance(parent, TroveTree):
async def unlink(self, parent_inode: InodeT, name: bytes, ctx) -> None:
parent_note = self._get_inode_note(parent_inode)
if not isinstance(parent_note, TroveTree):
raise pyfuse3.FUSEError(errno.ENOTDIR)
name_str = name.decode()
entries = parent.list()
if name_str not in entries:
if name_str not in parent_note.list():
raise pyfuse3.FUSEError(errno.ENOENT)
child = self._trove.get_raw_note(entries[name_str])
if child is None:
raise pyfuse3.FUSEError(errno.ENOENT)
if isinstance(child, TroveTree):
raise pyfuse3.FUSEError(errno.EISDIR)
parent.unlink(name_str)
parent_note.unlink(name.decode())
async def rename(self, parent_inode_old, name_old, parent_inode_new, name_new, flags, ctx):
old_parent = self._note_or_error(parent_inode_old)
new_parent = self._note_or_error(parent_inode_new)
if not isinstance(old_parent, TroveTree) or not isinstance(new_parent, TroveTree):
async def rename(self, parent_inode_old: InodeT, name_old: bytes, parent_inode_new: InodeT, name_new: bytes, flags, ctx):
# Decode / validate names
name_new_str = name_new.decode()
name_old_str = name_old.decode()
# Grab the parents
new_parent = self._get_inode_note(parent_inode_new)
if not isinstance(new_parent, TroveTree):
raise pyfuse3.FUSEError(errno.ENOTDIR)
old_parent = self._get_inode_note(parent_inode_old)
if not isinstance(old_parent, TroveTree):
raise pyfuse3.FUSEError(errno.ENOTDIR)
name_old_str = name_old.decode()
name_new_str = name_new.decode()
# We want to maintain the inode - find the note via the internal entity
ent, note = self._lookup_child(parent_inode_old, name_old)
old_entries = old_parent.list()
if name_old_str not in old_entries:
raise pyfuse3.FUSEError(errno.ENOENT)
# Remove existing target
new_parent.unlink(name_new_str)
child_id = old_entries[name_old_str]
child = self._trove.get_raw_note(child_id)
if child is None:
raise pyfuse3.FUSEError(errno.ENOENT)
# Remove existing target if present
new_entries = new_parent.list()
if name_new_str in new_entries:
target = self._trove.get_raw_note(new_entries[name_new_str])
if target is not None and isinstance(target, TroveTree):
if target.list():
raise pyfuse3.FUSEError(errno.ENOTEMPTY)
new_parent.unlink(name_new_str)
new_parent.link(name_new_str, child)
# Link to new parent, unlink from old
new_parent.link(name_new_str, note)
old_parent.unlink(name_old_str)
@ -249,6 +410,8 @@ class TroveFuseOps(pyfuse3.Operations):
# ------------------------------------------------------------------
async def _run(ops: TroveFuseOps, mountpoint: str) -> None:
logging.basicConfig(level=logging.DEBUG)
options = set(pyfuse3.default_options)
options.add("fsname=trove")
pyfuse3.init(ops, mountpoint, options)