diff --git a/trovedb/cli/cat.py b/trovedb/cli/cat.py new file mode 100644 index 0000000..fb67425 --- /dev/null +++ b/trovedb/cli/cat.py @@ -0,0 +1,22 @@ +"""Note List""" +import argparse +import sys + +from trovedb.cli import CliEnv + +def setup(parser: argparse.ArgumentParser) -> None: + """Configure this subcommand's arguments.""" + parser.add_argument('-u', '--unbuffered', action='store_true', + help="Output raw bytes, without any decoding.") + parser.add_argument('notes', nargs='+', metavar='FILENAME', + help="One or more notes to process.") + + +def run(env: CliEnv, args: argparse.Namespace) -> None: + """Entry point when this subcommand is invoked.""" + + # TODO: Resolve path! + for note in args.notes: + sys.stdout.buffer.write(env.local_trove.get_root().child(note).read_content()) + + diff --git a/trovedb/fs.py b/trovedb/fs.py index 3f36792..97ba565 100644 --- a/trovedb/fs.py +++ b/trovedb/fs.py @@ -4,7 +4,9 @@ import tempfile import datetime as dt from pathlib import Path from typing import Optional, Dict, List, Self, Iterable -from .trove import Note, Trove, TreeNote, BlobNote, Blob, Tree, BadNoteType, TreeEntry, NoteNotFound + +from .trove import Note, Trove, TreeNote, BadNoteType, TreeEntry, NoteNotFound +from . import fs_util as fsu class FSNote(Note): @@ -61,22 +63,17 @@ class FSNote(Note): def set_raw_metadata(self, key: str, value: bytes) -> None: self._trove._set_metadata(self._inode, key, value) -class FSBlobNote(FSNote, BlobNote): - def read(self) -> bytes: - if self._inode is None: - return b"" - return self._path.read_bytes() + def read_content(self) -> bytes: + """Read the raw content of the note.""" + content_file = fsu.get_content_path(self._path) + if content_file.exists(): + return content_file.read_bytes() + return b"" - def write(self, data: bytes) -> None: - self._path.write_bytes(data) - # Update cache just in case inode changed (some editors do this) - try: - new_inode = self._path.stat().st_ino - if new_inode != self._inode: - self._trove._update_cache(new_inode, self._path) - self._inode = new_inode - except OSError: - pass + def write_content(self, data:bytes) -> None: + """Write the raw content of the note.""" + content_file = fsu.get_content_path(self._path) + content_file.write_bytes(data) class FSTreeNote(FSNote, TreeNote): @property @@ -85,7 +82,7 @@ class FSTreeNote(FSNote, TreeNote): return "inode/directory" def link(self, name: str, note: Note): - if not isinstance(note, FSBlobNote): + if not isinstance(note, FSNote): raise BadNoteType("Only blob notes can be linked") target_path = self._path / name @@ -239,7 +236,7 @@ class FSTrove(Trove): if target_path.is_dir(): return FSTreeNote(self, inode=note_id, path=target_path) else: - return FSBlobNote(self, inode=note_id, path=target_path) + return FSNote(self, inode=note_id, path=target_path) def get_raw_note(self, note_id: int) -> Note: p = self.get_path_by_inode(note_id) @@ -248,7 +245,7 @@ class FSTrove(Trove): return self.get_raw_note_by_path(p) - def create_blob(self, data: bytes | None = None) -> BlobNote: + def create_blob(self, data: bytes | None = None) -> Note: fd, temp_path = tempfile.mkstemp(dir=self.working) try: if data: @@ -258,7 +255,7 @@ class FSTrove(Trove): p = Path(temp_path) inode = p.stat().st_ino self._update_cache(inode, p) - return FSBlobNote(self, inode=inode, path=p) + return FSNote(self, inode=inode, path=p) def get_root(self) -> TreeNote: return FSTreeNote(self, inode=self._root_inode, path=self.root) diff --git a/trovedb/fs_util.py b/trovedb/fs_util.py new file mode 100644 index 0000000..2654be7 --- /dev/null +++ b/trovedb/fs_util.py @@ -0,0 +1,200 @@ +""" +Filesystem utilities for interactive with Trove directory structures +""" + +import ctypes +import ctypes.util +import errno +import os +import platform +import tempfile +import logging +from pathlib import Path + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# renameat2(2) ctypes binding — Linux only +# --------------------------------------------------------------------------- + +FS_LOCAL_SPECIAL = ':' + +_renameat2 = None +if platform.system() == "Linux": + try: + _libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True) + _renameat2_func = _libc.renameat2 + _renameat2_func.argtypes = [ + ctypes.c_int, # olddirfd + ctypes.c_char_p, # oldpath + ctypes.c_int, # newdirfd + ctypes.c_char_p, # newpath + ctypes.c_uint, # flags + ] + _renameat2_func.restype = ctypes.c_int + _renameat2 = _renameat2_func + except (OSError, AttributeError): + pass + +_AT_FDCWD = -100 +_RENAME_EXCHANGE = 2 + +# errnos that mean "not supported here" rather than a real failure +_NOT_SUPPORTED = frozenset((errno.EINVAL, errno.ENOSYS, errno.EOPNOTSUPP)) +_HARDLINK_UNAVAIL = _NOT_SUPPORTED | frozenset((errno.EXDEV, errno.EPERM)) + +# --------------------------------------------------------------------------- +# Low-level primitives +# --------------------------------------------------------------------------- + +class _OpUnavailable(Exception): + pass + +def _rename_exchange(old: str | Path, new: str | Path) -> None: + """Atomically swap two filesystem entries. + + Both paths must exist. Raises RuntimeError if renameat2 is not + available, OSError on kernel/fs failure. + """ + if _renameat2 is None: + raise _OpUnavailable("renameat2 not available on this platform") + ret = _renameat2( + _AT_FDCWD, os.fsencode(str(old)), + _AT_FDCWD, os.fsencode(str(new)), + _RENAME_EXCHANGE, + ) + if ret != 0: + err = ctypes.get_errno() + if err in _NOT_SUPPORTED: + raise _OpUnavailable(f"renameat2 exchange not supported on filesystem") + + raise OSError(err, os.strerror(err), str(old), None, str(new)) + + +def _mklink_checked(src: Path, dest: Path) -> bool: + # Step 1 → 2: hardlink + try: + os.link(str(src), str(dest)) + return True + except OSError as e: + if e.errno in _HARDLINK_UNAVAIL: + return False + raise + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +def get_content_index_name_from_path(path: Path) -> str: + """Return the name of the index file for a directory.""" + # TODO: improve handling and mimetype logic + if not path.suffix: + return '_index.dat' + return f'_index{path.suffix}' + +def get_content_path(path: str | Path) -> Path: + """Return the path to the content file for a directory or file""" + path = Path(path).resolve() + if path.is_dir(): + return path / get_content_index_name_from_path(path) + return path + +def promote_path(path: str | Path) -> Path: + """Promote a regular file to a directory, preserving content as an index file. + + Transforms ``/some/dir/notes.md`` into:: + + /some/dir/notes.md/ + _index.md ← original content (suffix preserved) + + Atomicity depends on platform and filesystem capabilities, with + automatic fallback through three tiers: + + 1. hardlink + renameat2(RENAME_EXCHANGE) — observers see either + the old file or the new directory, never absence. Requires + Linux >=3.15 and a supporting filesystem (ext4, xfs, tmpfs). + A temporary entry briefly appears in the parent directory. + + 2. hardlink + unlink + rename — data is preserved via hard link + so content is never at risk, but observers see a brief window + where the original name is absent. + + 3. move + rename — works on any POSIX system. Same brief + absence window. + + A temporary directory (``::trove_promote_tmp``) is + created in the parent for the duration of the operation. On success + it is removed (or, in tier 1, contains only the original file's + hard link and is unlinked). If the final rename fails in tier 3, + the temp directory is left in place for recovery later. + + Args: + path: Regular file to promote. Must exist and be a regular file. + + Returns: + Path to the index file inside the resulting directory + (e.g., ``path / _index.md``). + + Raises: + ValueError: *path* is not a regular file. + OSError: Promotion failed. In tiers 1-2, the original file is + restored. In tier 3, the temp directory may remain for + manual or automated recovery. + """ + path = Path(path).resolve() + if not path.is_file(): + raise ValueError(f"not a regular file: {path}") + + # Create a temporary directory + target_path_tmp = Path(tempfile.mkdtemp(dir=str(path.parent.absolute()), prefix=f"{path.name}{FS_LOCAL_SPECIAL}", suffix=f"{FS_LOCAL_SPECIAL}trove_promote_tmp")) + target_name = get_content_index_name_from_path(path) + target_path = path / target_name + + # Attempt to preserve original file during operation via hardlink + # This is the 'mostly' safe method + try: + hardlink_created = _mklink_checked(path, target_path_tmp / target_name) + except OSError: + try: + os.rmdir(target_path_tmp) + except OSError: + logger.warning("Failed to remove temporary file: %s", target_path_tmp) + raise + + if hardlink_created: + try: + _rename_exchange(path, target_path_tmp) + except _OpUnavailable: + # Exchanging isn't supported, unlink and rename instead + # This results in a 'blip' to observers of the filesystems + os.unlink(path) + os.rename(target_path_tmp, path) + return target_path + try: + os.unlink(target_path_tmp) + except OSError: + # Failed to delete hard link to the original file. THis means the temporary + # path still exists, but we can't do much so ignore. + logger.error("Failed to remove temporary file: %s", target_path_tmp) + return target_path + + # Hard linking isn't an option, try dual move + # If the first step fails, the original file is still there and we attempt to + # delete the temporary directory before passing along the error. + try: + os.rename(path, target_path_tmp / target_name) + except OSError: + try: + os.rmdir(target_path_tmp) + except OSError: + logger.warning("Failed to remove temporary file: %s", target_path_tmp) + raise + + # Move back to the original path now + # If this fails, trove scan of the path will be able to tell what happened + # and present user an option for recovery. Failure here indicates something + # _very_ wrong - we created the new tmp path and removed the old path + # without error. We exit intentionally to avoid further damage. + os.rename(target_path_tmp, path) + return target_path diff --git a/trovedb/mime.py b/trovedb/mime.py new file mode 100644 index 0000000..229c42d --- /dev/null +++ b/trovedb/mime.py @@ -0,0 +1,6 @@ +from typing import NamedTuple + +EXTENSIONS_MAP = { + "text/plain": ".txt", + "text/markdown": ".md", +} diff --git a/trovedb/qgui/tool_basic_editor.py b/trovedb/qgui/tool_basic_editor.py index 530b88c..8c0f408 100644 --- a/trovedb/qgui/tool_basic_editor.py +++ b/trovedb/qgui/tool_basic_editor.py @@ -18,11 +18,5 @@ class ToolBasicEditor(Tool): layout.addWidget(self._text_edit) self.refresh() - def _refresh_blob(self, note: tr.Blob): - self._text_edit.setPlainText(note.read().decode("utf-8")) - def refresh(self): - if isinstance(self.note, tr.Blob): - self._refresh_blob(cast(tr.Blob, self.note)) - - + self._text_edit.setPlainText(self.note.read_content().decode("utf-8")) diff --git a/trovedb/trove.py b/trovedb/trove.py index 924331f..a3a47f8 100644 --- a/trovedb/trove.py +++ b/trovedb/trove.py @@ -56,14 +56,12 @@ class Note(Protocol): """Set metadata value for the given key.""" ... -@runtime_checkable -class Blob(Protocol): - def read(self) -> bytes: + def read_content(self) -> bytes: """Read the raw content of the note.""" ... - def write(self, data: bytes) -> None: - """Write new content to the note.""" + def write_content(self, data:bytes) -> None: + """Write the raw content of the note.""" ... @@ -101,10 +99,6 @@ class Tree(Protocol): """Return all entries as {name: object_id}.""" ... -@runtime_checkable -class BlobNote(Note, Blob, Protocol): - """Blob Note""" - @runtime_checkable class TreeNote(Note, Tree, Protocol): """Tree Note""" @@ -121,7 +115,7 @@ class Trove(Protocol): """Retrieve a note by a object id""" ... - def create_blob(self, data: bytes | None = None) -> BlobNote: + def create_blob(self, data: bytes | None = None) -> Note: """Create a new blob node at the given path with content""" ... diff --git a/trovedb/trovedb.py b/trovedb/trovedb.py index 435ac64..77aaa23 100644 --- a/trovedb/trovedb.py +++ b/trovedb/trovedb.py @@ -13,7 +13,7 @@ from .db import Sqlite3Trove, NOTE_ROOT_ID from . import trove as tr -from .trove import Note, Trove, TreeNote, BlobNote, TreeEntry, NoteNotFound, ObjectId +from .trove import Note, Trove, TreeNote, TreeEntry, NoteNotFound, ObjectId class NoteImpl(Note): @@ -50,16 +50,11 @@ class NoteImpl(Note): def set_raw_metadata(self, key: str, value: bytes) -> None: self._db.write_metadata(self._object_id, key, value) - -class BlobNoteImpl(NoteImpl, BlobNote): - """Concrete BlobNote: a blob object in the store with metadata access.""" - - # Blob protocol - def read(self) -> bytes: + def read_content(self) -> bytes: data = self._db.read_object(self._object_id) return data if data is not None else b"" - def write(self, data: bytes) -> None: + def write_content(self, data: bytes) -> None: self._db.write_content(self._object_id, data) @@ -147,13 +142,13 @@ class TroveImpl: raise NoteNotFound(note_id) if self._db.is_tree(note_id) or info.type == "inode/directory": return TreeNoteImpl(self, note_id) - return BlobNoteImpl(self, note_id) + return NoteImpl(self, note_id) def create_blob(self, data: bytes | None = None, - dtype: str = "application/octet-stream") -> BlobNote: + dtype: str = "application/octet-stream") -> Note: """Create a new blob object and return a BlobNote for it.""" obj_id = self._db.write_blob(data or b"", dtype=dtype) - return BlobNoteImpl(self, obj_id) + return NoteImpl(self, obj_id) def get_root(self) -> TreeNote: """Return the root TreeNote (always id=NOTE_ROOT_ID)."""