1354 lines
46 KiB
Rust
Raw Normal View History

2026-03-13 00:18:14 +00:00
use std::net::SocketAddr;
use std::path::PathBuf;
use std::process::Command;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::time::{Instant, SystemTime, UNIX_EPOCH};
use clap::Parser;
use ontoref_daemon::actors::ActorRegistry;
use ontoref_daemon::api::{self, AppState};
use ontoref_daemon::watcher::{FileWatcher, WatcherDeps};
use tokio::net::TcpListener;
use tokio::sync::watch;
use tower_http::trace::TraceLayer;
use tracing::{error, info, warn};
/// Read and apply bootstrap config from stdin (ADR-004: NCL pipe bootstrap).
///
/// Reads all of stdin as JSON, applies top-level values to CLI defaults, then
/// redirects stdin to /dev/null so the daemon's event loop does not block on
/// it. Returns the parsed JSON for downstream consumers (e.g. NATS init).
/// Aborts with exit(1) if stdin is not a pipe or JSON is invalid.
/// Read and apply bootstrap config from stdin (ADR-004: NCL pipe bootstrap).
///
/// Returns the full parsed JSON. Callers extract feature-gated sections (e.g.
/// projects) after this returns.
fn apply_stdin_config(cli: &mut Cli) -> serde_json::Value {
use std::io::{IsTerminal, Read};
if std::io::stdin().is_terminal() {
eprintln!(
"error: --config-stdin requires a pipe. Usage:\n nickel export --format json \
config.ncl | ontoref-daemon --config-stdin"
);
std::process::exit(1);
}
let mut buf = String::new();
if let Err(e) = std::io::stdin().read_to_string(&mut buf) {
eprintln!("error: failed to read stdin config: {e}");
std::process::exit(1);
}
let json: serde_json::Value = match serde_json::from_str(&buf) {
Ok(v) => v,
Err(e) => {
eprintln!("error: --config-stdin: invalid JSON — daemon not started: {e}");
std::process::exit(1);
}
};
// daemon port
if let Some(port) = json
.get("daemon")
.and_then(|d| d.get("port"))
.and_then(|p| p.as_u64())
{
cli.port = port as u16;
}
// db credentials — only applied when enabled = true
#[cfg(feature = "db")]
if let Some(db) = json.get("db").and_then(|d| d.as_object()) {
let db_enabled = db.get("enabled").and_then(|e| e.as_bool()).unwrap_or(false);
if db_enabled && cli.db_url.is_none() {
if let Some(url) = db.get("url").and_then(|u| u.as_str()) {
if !url.is_empty() {
cli.db_url = Some(url.to_string());
}
}
}
if cli.db_namespace.is_none() {
if let Some(ns) = db.get("namespace").and_then(|n| n.as_str()) {
if !ns.is_empty() {
cli.db_namespace = Some(ns.to_string());
}
}
}
if let Some(user) = db.get("username").and_then(|u| u.as_str()) {
if !user.is_empty() {
cli.db_username = user.to_string();
}
}
if let Some(pass) = db.get("password").and_then(|p| p.as_str()) {
if !pass.is_empty() {
cli.db_password = pass.to_string();
}
}
}
// ui paths
#[cfg(feature = "ui")]
apply_ui_config(cli, &json);
tracing::info!("config loaded from stdin (ADR-004 NCL pipe bootstrap)");
// Release stdin — daemon must not block on it during normal operation.
#[cfg(unix)]
if let Ok(devnull) = std::fs::File::open("/dev/null") {
use std::os::unix::io::IntoRawFd;
let fd = devnull.into_raw_fd();
unsafe {
libc::dup2(fd, 0);
libc::close(fd);
}
}
json
}
2026-03-13 00:18:14 +00:00
/// Load daemon config from .ontoref/config.ncl and override CLI defaults.
/// Returns (NICKEL_IMPORT_PATH, parsed config JSON) — both optional.
fn load_config_overrides(cli: &mut Cli) -> (Option<String>, Option<serde_json::Value>) {
2026-03-13 00:18:14 +00:00
let config_path = cli.project_root.join(".ontoref").join("config.ncl");
if !config_path.exists() {
return (None, None);
2026-03-13 00:18:14 +00:00
}
let output = match Command::new("nickel")
.arg("export")
.arg(&config_path)
.output()
{
Ok(o) => o,
Err(e) => {
warn!(error = %e, path = %config_path.display(), "failed to read config");
return (None, None);
2026-03-13 00:18:14 +00:00
}
};
if !output.status.success() {
warn!("nickel export failed for config");
return (None, None);
2026-03-13 00:18:14 +00:00
}
let config_json: serde_json::Value = match serde_json::from_slice(&output.stdout) {
Ok(v) => v,
Err(e) => {
warn!(error = %e, "failed to parse config JSON");
return (None, None);
2026-03-13 00:18:14 +00:00
}
};
// Extract daemon config
if let Some(daemon) = config_json.get("daemon").and_then(|d| d.as_object()) {
if let Some(port) = daemon.get("port").and_then(|p| p.as_u64()) {
cli.port = port as u16;
}
if let Some(timeout) = daemon.get("idle_timeout").and_then(|t| t.as_u64()) {
cli.idle_timeout = timeout;
}
if let Some(interval) = daemon.get("invalidation_interval").and_then(|i| i.as_u64()) {
cli.invalidation_interval = interval;
}
if let Some(sweep) = daemon.get("actor_sweep_interval").and_then(|s| s.as_u64()) {
cli.actor_sweep_interval = sweep;
}
if let Some(stale) = daemon.get("actor_stale_timeout").and_then(|s| s.as_u64()) {
cli.actor_stale_timeout = stale;
}
if let Some(max) = daemon.get("max_notifications").and_then(|m| m.as_u64()) {
cli.max_notifications = max as usize;
}
if let Some(ack_dirs) = daemon
.get("notification_ack_required")
.and_then(|a| a.as_array())
{
cli.notification_ack_required = ack_dirs
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
}
}
// Extract db config — only when enabled = true
2026-03-13 00:18:14 +00:00
#[cfg(feature = "db")]
if let Some(db) = config_json.get("db").and_then(|d| d.as_object()) {
let db_enabled = db.get("enabled").and_then(|e| e.as_bool()).unwrap_or(false);
if db_enabled {
cli.db_url = db
.get("url")
.and_then(|u| u.as_str())
.filter(|s| !s.is_empty())
.map(str::to_string);
cli.db_namespace = db
.get("namespace")
.and_then(|n| n.as_str())
.filter(|s| !s.is_empty())
.map(str::to_string);
if let Some(user) = db
.get("username")
.and_then(|u| u.as_str())
.filter(|s| !s.is_empty())
{
cli.db_username = user.to_string();
2026-03-13 00:18:14 +00:00
}
if let Some(pass) = db
.get("password")
.and_then(|p| p.as_str())
.filter(|s| !s.is_empty())
{
cli.db_password = pass.to_string();
2026-03-13 00:18:14 +00:00
}
}
}
// Env var overrides for DB credentials — not persisted to disk.
#[cfg(feature = "db")]
{
if let Ok(user) = std::env::var("ONTOREF_DB_USERNAME") {
if !user.is_empty() {
cli.db_username = user;
}
}
if let Ok(pass) = std::env::var("ONTOREF_DB_PASSWORD") {
if !pass.is_empty() {
cli.db_password = pass;
}
}
}
// UI config section — only populates fields not already set via CLI.
#[cfg(feature = "ui")]
apply_ui_config(cli, &config_json);
info!("config loaded from {}", config_path.display());
let import_path = config_json
.get("nickel_import_paths")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str())
.collect::<Vec<_>>()
.join(":")
})
.filter(|s| !s.is_empty());
(import_path, Some(config_json))
2026-03-13 00:18:14 +00:00
}
#[derive(Parser)]
#[command(name = "ontoref-daemon", about = "Ontoref cache daemon")]
struct Cli {
/// Project root directory (where .ontoref/config.ncl lives)
#[arg(long, default_value = ".")]
project_root: PathBuf,
/// Stratumiops root directory (for shared schemas/modules)
#[arg(long)]
ontoref_root: Option<PathBuf>,
/// Path to the ontoref config directory (e.g. ~/.config/ontoref).
/// Used to persist runtime key overrides in keys-overlay.json.
/// Set automatically by ontoref-daemon-boot.
#[arg(long)]
config_dir: Option<PathBuf>,
2026-03-13 00:18:14 +00:00
/// HTTP listen port (overridden by config if present)
#[arg(long, default_value_t = 7891)]
port: u16,
/// Seconds of inactivity before auto-shutdown (overridden by config)
#[arg(long, default_value_t = 1800)]
idle_timeout: u64,
/// Full cache invalidation interval in seconds (overridden by config)
#[arg(long, default_value_t = 60)]
invalidation_interval: u64,
/// PID file path
#[arg(long)]
pid_file: Option<PathBuf>,
/// Actor sweep interval in seconds (reap stale sessions)
#[arg(long, default_value_t = 30)]
actor_sweep_interval: u64,
/// Seconds before a remote actor (no `kill -0` check) is considered stale
#[arg(long, default_value_t = 120)]
actor_stale_timeout: u64,
/// Maximum notifications to retain per project (ring buffer)
#[arg(long, default_value_t = 1000)]
max_notifications: usize,
/// Directories requiring notification acknowledgment before commit
#[arg(long, value_delimiter = ',')]
notification_ack_required: Vec<String>,
/// Directory containing Tera HTML templates for the web UI
#[cfg(feature = "ui")]
#[arg(long)]
templates_dir: Option<PathBuf>,
/// Directory to serve as /public (CSS, JS assets)
#[cfg(feature = "ui")]
#[arg(long)]
public_dir: Option<PathBuf>,
/// Path to registry.toml for multi-project mode
#[cfg(feature = "ui")]
#[arg(long)]
registry: Option<PathBuf>,
/// Read bootstrap config as JSON from stdin (piped from
/// scripts/ontoref-daemon-start). Applies db, nats, ui, port values
/// before project-level .ontoref/config.ncl overrides.
/// Stdin is released to /dev/null after reading. See ADR-004.
#[arg(long)]
config_stdin: bool,
2026-03-13 00:18:14 +00:00
/// Hash a password with argon2id and print the PHC string, then exit
#[arg(long, value_name = "PASSWORD")]
hash_password: Option<String>,
/// Run as an MCP server over stdin/stdout (for Claude Desktop, Cursor,
/// etc.). No HTTP server is started in this mode.
#[cfg(feature = "mcp")]
#[arg(long)]
mcp_stdio: bool,
/// TLS certificate file (PEM). Enables HTTPS when combined with --tls-key
#[cfg(feature = "tls")]
#[arg(long)]
tls_cert: Option<PathBuf>,
/// TLS private key file (PEM). Enables HTTPS when combined with --tls-cert
#[cfg(feature = "tls")]
#[arg(long)]
tls_key: Option<PathBuf>,
/// SurrealDB remote WebSocket URL (e.g., ws://127.0.0.1:8000)
#[cfg(feature = "db")]
#[arg(long)]
db_url: Option<String>,
/// SurrealDB namespace for this daemon instance
#[cfg(feature = "db")]
#[arg(long)]
db_namespace: Option<String>,
/// SurrealDB username
#[cfg(feature = "db")]
#[arg(long, default_value = "root")]
db_username: String,
/// SurrealDB password
#[cfg(feature = "db")]
#[arg(long, default_value = "root")]
db_password: String,
}
/// Background task: receive newly registered projects and start a FileWatcher
/// for each. Watchers are inserted into `watcher_map` (shared with `AppState`)
/// so that `DELETE /projects/{slug}` can abort and drop them on demand.
async fn runtime_watcher_task(
mut rx: tokio::sync::mpsc::UnboundedReceiver<
std::sync::Arc<ontoref_daemon::registry::ProjectContext>,
>,
watcher_map: std::sync::Arc<
tokio::sync::Mutex<std::collections::HashMap<String, ontoref_daemon::watcher::FileWatcher>>,
>,
invalidation_interval: u64,
#[cfg(feature = "db")] db: Option<std::sync::Arc<stratum_db::StratumDb>>,
#[cfg(feature = "nats")] nats: Option<std::sync::Arc<ontoref_daemon::nats::NatsPublisher>>,
) {
while let Some(ctx) = rx.recv().await {
if ctx.push_only || ctx.root.as_os_str().is_empty() {
continue;
}
let deps = ontoref_daemon::watcher::WatcherDeps {
slug: ctx.slug.clone(),
#[cfg(feature = "db")]
db: db.clone(),
import_path: ctx.import_path.clone(),
notifications: std::sync::Arc::clone(&ctx.notifications),
actors: std::sync::Arc::clone(&ctx.actors),
#[cfg(feature = "nats")]
nats: nats.clone(),
seed_lock: std::sync::Arc::clone(&ctx.seed_lock),
ontology_version: std::sync::Arc::clone(&ctx.ontology_version),
};
match ontoref_daemon::watcher::FileWatcher::start(
&ctx.root,
std::sync::Arc::clone(&ctx.cache),
invalidation_interval,
deps,
) {
Ok(w) => {
info!(slug = %ctx.slug, "runtime project watcher started");
watcher_map.lock().await.insert(ctx.slug.clone(), w);
}
Err(e) => warn!(slug = %ctx.slug, error = %e, "runtime project watcher failed"),
}
}
}
/// Load `{config_dir}/keys-overlay.json` and apply stored keys to the registry.
///
/// The reserved slug `"_primary"` maps to `registry.primary_slug()` so callers
/// do not need to know the actual primary project name.
/// Called at startup and by `ConfigWatcher` on every file change.
fn apply_keys_overlay(
config_dir: &Option<PathBuf>,
registry: &ontoref_daemon::registry::ProjectRegistry,
) {
let Some(dir) = config_dir.as_deref() else {
return;
};
let overlay_path = dir.join("keys-overlay.json");
let Ok(data) = std::fs::read_to_string(&overlay_path) else {
return;
};
let Ok(overrides) = serde_json::from_str::<
std::collections::HashMap<String, Vec<ontoref_daemon::registry::KeyEntry>>,
>(&data) else {
warn!(path = %overlay_path.display(), "keys-overlay.json is not valid JSON — skipped");
return;
};
for (slug, keys) in overrides {
let resolved = if slug == "_primary" {
registry.primary_slug().to_string()
} else {
slug
};
if registry.update_keys(&resolved, keys).is_some() {
info!(%resolved, "keys overlay applied");
} else {
warn!(%resolved, "keys overlay: no registered project with this slug — skipped");
}
}
}
2026-03-13 00:18:14 +00:00
#[tokio::main]
async fn main() {
// Parse CLI first so we can redirect logs to stderr in stdio MCP mode.
// In stdio mode stdout is the MCP JSON-RPC transport; any log line there
// corrupts the framing and the client silently drops or errors.
let mut cli = Cli::parse();
#[cfg(feature = "mcp")]
let use_stderr = cli.mcp_stdio;
#[cfg(not(feature = "mcp"))]
let use_stderr = false;
let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "ontoref_daemon=info,tower_http=debug".into());
if use_stderr {
tracing_subscriber::fmt()
.with_env_filter(env_filter)
.with_writer(std::io::stderr)
.init();
} else {
tracing_subscriber::fmt().with_env_filter(env_filter).init();
}
if let Some(ref password) = cli.hash_password {
use argon2::{
password_hash::{rand_core::OsRng, PasswordHasher, SaltString},
Argon2,
};
let salt = SaltString::generate(&mut OsRng);
let hash = Argon2::default()
.hash_password(password.as_bytes(), &salt)
.expect("argon2 hash failed")
.to_string();
println!("{hash}");
return;
}
// Bootstrap config from stdin pipe (ADR-004).
// When --config-stdin is set the stdin JSON is the authoritative config;
// the project .ontoref/config.ncl is not read.
let (nickel_import_path, loaded_config) = if cli.config_stdin {
let json = apply_stdin_config(&mut cli);
(None, Some(json))
} else {
load_config_overrides(&mut cli)
};
// Extract registered projects from the stdin config (service mode).
let stdin_projects: Vec<ontoref_daemon::registry::RegistryEntry> = if cli.config_stdin {
loaded_config
.as_ref()
.and_then(|j| j.get("projects"))
.and_then(|p| serde_json::from_value(p.clone()).ok())
.unwrap_or_default()
} else {
vec![]
};
if !stdin_projects.is_empty() {
info!(
count = stdin_projects.len(),
"projects loaded from stdin config"
);
}
// If templates/public dirs were not set by config or CLI, fall back to the
// XDG share location installed by `just install-daemon`.
#[cfg(feature = "ui")]
{
let xdg_share = std::env::var_os("HOME")
.map(|home| std::path::PathBuf::from(home).join(".local/share/ontoref-daemon"));
if cli.templates_dir.is_none() {
let candidate = xdg_share.as_deref().map(|s| s.join("templates"));
if candidate.as_deref().is_some_and(|p| p.exists()) {
cli.templates_dir = candidate;
}
}
if cli.public_dir.is_none() {
let candidate = xdg_share.as_deref().map(|s| s.join("public"));
if candidate.as_deref().is_some_and(|p| p.exists()) {
cli.public_dir = candidate;
}
}
}
2026-03-13 00:18:14 +00:00
let project_root = match cli.project_root.canonicalize() {
Ok(p) if p.is_dir() => p,
Ok(p) => {
error!(
path = %p.display(),
"project_root is not a directory — aborting"
);
std::process::exit(1);
}
Err(e) => {
error!(
path = %cli.project_root.display(),
error = %e,
"project_root does not exist or is inaccessible — aborting"
);
std::process::exit(1);
}
};
info!(
project_root = %project_root.display(),
port = cli.port,
idle_timeout = cli.idle_timeout,
"starting ontoref-daemon"
);
// Derive primary slug from the project root directory name.
let primary_slug = project_root
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("default")
.to_string();
2026-03-13 00:18:14 +00:00
// Build primary ProjectContext up-front so its Arcs (cache, actors,
// notifications, seed_lock, ontology_version) can be aliased into AppState
// and reused by the watcher before the registry is assembled.
let ack_required = if cli.notification_ack_required.is_empty() {
vec![".ontology".to_string(), "adrs".to_string()]
} else {
cli.notification_ack_required.clone()
};
let primary_ctx =
ontoref_daemon::registry::make_context(ontoref_daemon::registry::ContextSpec {
slug: primary_slug.clone(),
root: project_root.clone(),
import_path: nickel_import_path.clone(),
keys: vec![], // populated from keys-overlay.json after registry is built
remote_url: String::new(),
push_only: false,
stale_actor_timeout: cli.actor_stale_timeout,
max_notifications: cli.max_notifications,
ack_required,
2026-03-13 00:18:14 +00:00
});
// Alias the primary Arcs into local bindings for use before and after
// the primary context is consumed into the registry.
let cache = Arc::clone(&primary_ctx.cache);
let actors = Arc::clone(&primary_ctx.actors);
let notifications = Arc::clone(&primary_ctx.notifications);
let primary_seed_lock = Arc::clone(&primary_ctx.seed_lock);
let primary_ontology_arc = Arc::clone(&primary_ctx.ontology_version);
2026-03-13 00:18:14 +00:00
#[cfg(feature = "ui")]
let sessions = Arc::new(ontoref_daemon::session::SessionStore::new());
// Initialize Tera template engine from the configured templates directory.
#[cfg(feature = "ui")]
let tera_instance: Option<Arc<tokio::sync::RwLock<tera::Tera>>> = {
if let Some(ref tdir) = cli.templates_dir {
let glob = format!("{}/**/*.html", tdir.display());
match tera::Tera::new(&glob) {
Ok(t) => {
info!(templates_dir = %tdir.display(), "Tera templates loaded");
Some(Arc::new(tokio::sync::RwLock::new(t)))
}
Err(e) => {
warn!(error = %e, templates_dir = %tdir.display(), "Tera init failed — UI disabled");
None
}
}
} else {
info!("--templates-dir not set — web UI disabled");
None
}
};
// Optional DB connection with health check
#[cfg(feature = "db")]
let db = {
if cli.db_url.is_some() {
info!(url = %cli.db_url.as_deref().unwrap_or(""), "connecting to SurrealDB...");
connect_db(&cli).await
} else {
info!("SurrealDB not configured — running cache-only");
None
}
};
// Seed ontology tables from local NCL files → DB projection.
#[cfg(feature = "db")]
{
if let Some(ref db) = db {
info!("seeding ontology tables from local files...");
ontoref_daemon::seed::seed_ontology(
db,
&primary_slug,
2026-03-13 00:18:14 +00:00
&project_root,
&cache,
nickel_import_path.as_deref(),
)
.await;
}
}
// Initialize NATS publisher
#[cfg(feature = "nats")]
let nats_publisher = {
let project_name = project_root
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
.to_string();
match ontoref_daemon::nats::NatsPublisher::connect(
loaded_config.as_ref(),
2026-03-13 00:18:14 +00:00
project_name,
cli.port,
)
.await
{
Ok(Some(pub_)) => {
info!("NATS publisher initialized");
Some(Arc::new(pub_))
}
Ok(None) => {
info!("NATS disabled or unavailable");
None
}
Err(e) => {
warn!(error = %e, "NATS initialization failed");
None
}
}
};
// Start file watcher for the primary project — after DB so it can re-seed on
// changes. Seed lock and ontology version come from the primary
// ProjectContext.
2026-03-13 00:18:14 +00:00
let watcher_deps = WatcherDeps {
slug: primary_slug.clone(),
2026-03-13 00:18:14 +00:00
#[cfg(feature = "db")]
db: db.clone(),
import_path: nickel_import_path.clone(),
notifications: Arc::clone(&notifications),
actors: Arc::clone(&actors),
#[cfg(feature = "nats")]
nats: nats_publisher.clone(),
seed_lock: Arc::clone(&primary_seed_lock),
ontology_version: Arc::clone(&primary_ontology_arc),
2026-03-13 00:18:14 +00:00
};
let _watcher = match FileWatcher::start(
&project_root,
Arc::clone(&cache),
cli.invalidation_interval,
watcher_deps,
) {
Ok(w) => Some(w),
Err(e) => {
error!(error = %e, "file watcher failed to start — running without auto-invalidation");
None
}
};
// Build registry — always present. Primary project is the first entry;
// additional service-mode projects (from stdin config) follow.
let stdin_projects_count = stdin_projects.len();
let registry = Arc::new(ontoref_daemon::registry::ProjectRegistry::with_primary(
primary_slug.clone(),
Arc::new(primary_ctx),
stdin_projects,
cli.actor_stale_timeout,
cli.max_notifications,
));
if stdin_projects_count > 0 {
info!(
extra_projects = stdin_projects_count,
"registry built from stdin config (service mode)"
);
}
// Load persisted key overrides from keys-overlay.json and apply to registry.
// The reserved slug "_primary" maps to the primary project's key set.
apply_keys_overlay(&cli.config_dir, &registry);
// Shared watcher map — keyed by project slug.
// `DELETE /projects/{slug}` removes the entry to abort and drop the watcher.
// Runtime-added projects are inserted by `runtime_watcher_task`.
let watcher_map: Arc<tokio::sync::Mutex<std::collections::HashMap<String, FileWatcher>>> =
Arc::new(tokio::sync::Mutex::new(std::collections::HashMap::new()));
// Start one watcher per local registered project (excluding the primary,
// which is already watched above). Remote projects (push_only = true) have
// no local filesystem to watch.
{
let mut map = watcher_map.lock().await;
for ctx in registry.all().into_iter().filter(|ctx| {
!ctx.push_only && !ctx.root.as_os_str().is_empty() && ctx.slug != primary_slug
}) {
let deps = WatcherDeps {
slug: ctx.slug.clone(),
#[cfg(feature = "db")]
db: db.clone(),
import_path: ctx.import_path.clone(),
notifications: Arc::clone(&ctx.notifications),
actors: Arc::clone(&ctx.actors),
#[cfg(feature = "nats")]
nats: nats_publisher.clone(),
seed_lock: Arc::clone(&ctx.seed_lock),
ontology_version: Arc::clone(&ctx.ontology_version),
};
match FileWatcher::start(
&ctx.root,
Arc::clone(&ctx.cache),
cli.invalidation_interval,
deps,
) {
Ok(w) => {
info!(slug = %ctx.slug, root = %ctx.root.display(), "registry project watcher started");
map.insert(ctx.slug.clone(), w);
}
Err(e) => {
warn!(slug = %ctx.slug, error = %e, "registry project watcher failed to start");
}
}
}
}
2026-03-13 00:18:14 +00:00
let epoch_secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let last_activity = Arc::new(AtomicU64::new(epoch_secs));
// Compute bind address before AppState so rate-limiter enabled flag is
// available.
let addr = std::net::SocketAddr::from(([127, 0, 0, 1], cli.port));
// Channel for starting file watchers for runtime-added projects (#3).
let (new_project_tx, new_project_rx) = tokio::sync::mpsc::unbounded_channel::<
std::sync::Arc<ontoref_daemon::registry::ProjectContext>,
>();
let new_project_tx = std::sync::Arc::new(new_project_tx);
let config_dir = cli.config_dir.clone();
// Load daemon-level admin token hash from env.
// ONTOREF_ADMIN_TOKEN_FILE (path to hash file) takes precedence over
// ONTOREF_ADMIN_TOKEN (inline hash). The hash is an argon2id PHC string
// generated with: ontoref-daemon --hash-password <password>
#[cfg(feature = "ui")]
let daemon_admin_hash: Option<String> = {
if let Ok(path) = std::env::var("ONTOREF_ADMIN_TOKEN_FILE") {
match std::fs::read_to_string(path.trim()) {
Ok(h) => {
let h = h.trim().to_string();
info!("daemon admin token loaded from ONTOREF_ADMIN_TOKEN_FILE");
Some(h)
}
Err(e) => {
warn!(error = %e, "ONTOREF_ADMIN_TOKEN_FILE set but unreadable — manage auth disabled");
None
}
}
} else if let Ok(h) = std::env::var("ONTOREF_ADMIN_TOKEN") {
info!("daemon admin token loaded from ONTOREF_ADMIN_TOKEN");
Some(h.trim().to_string())
} else {
None
}
};
2026-03-13 00:18:14 +00:00
// Capture display values before they are moved into AppState.
#[cfg(feature = "ui")]
let project_root_str = project_root.display().to_string();
#[cfg(feature = "ui")]
let ui_startup: Option<(String, String)> = cli.templates_dir.as_ref().map(|tdir| {
let public = cli
.public_dir
.as_ref()
.map(|p| p.display().to_string())
.unwrap_or_else(|| "".to_string());
(tdir.display().to_string(), public)
});
let state = {
#[cfg(feature = "nats")]
{
AppState {
cache,
project_root,
ontoref_root: cli.ontoref_root,
started_at: Instant::now(),
last_activity: Arc::clone(&last_activity),
actors: Arc::clone(&actors),
notifications: Arc::clone(&notifications),
nickel_import_path: nickel_import_path.clone(),
#[cfg(feature = "db")]
db,
nats: nats_publisher.clone(),
#[cfg(feature = "ui")]
tera: tera_instance,
#[cfg(feature = "ui")]
public_dir: cli.public_dir,
registry: Arc::clone(&registry),
new_project_tx: Some(Arc::clone(&new_project_tx)),
config_dir: config_dir.clone(),
2026-03-13 00:18:14 +00:00
#[cfg(feature = "ui")]
sessions: Arc::clone(&sessions),
#[cfg(feature = "mcp")]
mcp_current_project: Arc::new(std::sync::RwLock::new(None)),
watcher_map: Arc::clone(&watcher_map),
auth_rate_limiter: Arc::new(ontoref_daemon::api::AuthRateLimiter::new(
!addr.ip().is_loopback(),
)),
#[cfg(feature = "ui")]
ncl_write_lock: Arc::new(ontoref_daemon::ui::ncl_write::NclWriteLock::new()),
#[cfg(feature = "ui")]
daemon_admin_hash: daemon_admin_hash.clone(),
2026-03-13 00:18:14 +00:00
}
}
#[cfg(not(feature = "nats"))]
{
AppState {
cache,
project_root,
ontoref_root: cli.ontoref_root,
started_at: Instant::now(),
last_activity: Arc::clone(&last_activity),
actors: Arc::clone(&actors),
notifications: Arc::clone(&notifications),
nickel_import_path: nickel_import_path.clone(),
#[cfg(feature = "db")]
db,
#[cfg(feature = "ui")]
tera: tera_instance,
#[cfg(feature = "ui")]
public_dir: cli.public_dir,
registry: Arc::clone(&registry),
new_project_tx: Some(Arc::clone(&new_project_tx)),
config_dir: config_dir.clone(),
2026-03-13 00:18:14 +00:00
#[cfg(feature = "ui")]
sessions: Arc::clone(&sessions),
#[cfg(feature = "mcp")]
mcp_current_project: Arc::new(std::sync::RwLock::new(None)),
watcher_map: Arc::clone(&watcher_map),
auth_rate_limiter: Arc::new(ontoref_daemon::api::AuthRateLimiter::new(
!addr.ip().is_loopback(),
)),
#[cfg(feature = "ui")]
ncl_write_lock: Arc::new(ontoref_daemon::ui::ncl_write::NclWriteLock::new()),
#[cfg(feature = "ui")]
daemon_admin_hash,
}
}
};
// Runtime watcher task: start a FileWatcher for every project added after boot.
{
let invalidation_interval = cli.invalidation_interval;
#[cfg(feature = "db")]
let runtime_db = state.db.clone();
#[cfg(feature = "nats")]
let runtime_nats = state.nats.clone();
tokio::spawn(runtime_watcher_task(
new_project_rx,
Arc::clone(&watcher_map),
invalidation_interval,
#[cfg(feature = "db")]
runtime_db,
#[cfg(feature = "nats")]
runtime_nats,
));
}
// Start config hot-reload watcher — watches keys-overlay.json and re-applies
// credentials for registry projects and the primary project on every change.
let _config_watcher = if let Some(ref dir) = cli.config_dir {
let overlay_path = dir.join("keys-overlay.json");
match ontoref_daemon::watcher::ConfigWatcher::start(overlay_path, Arc::clone(&registry)) {
Ok(w) => {
info!("config hot-reload watcher started");
Some(w)
}
Err(e) => {
warn!(error = %e, "config watcher failed to start — credentials require restart to reload");
None
2026-03-13 00:18:14 +00:00
}
}
} else {
None
2026-03-13 00:18:14 +00:00
};
// Start template hot-reload watcher if templates dir is configured.
#[cfg(feature = "ui")]
let _template_watcher = {
if let (Some(ref tdir), Some(ref tera)) = (&cli.templates_dir, &state.tera) {
match ontoref_daemon::ui::TemplateWatcher::start(tdir, Arc::clone(tera)) {
Ok(w) => Some(w),
Err(e) => {
warn!(error = %e, "template watcher failed to start — hot-reload disabled");
None
}
}
} else {
None
}
};
// Start passive drift observer (scan+diff, no apply).
#[cfg(feature = "ui")]
let _drift_watcher = {
let project_name = state.default_project_name();
let notif_store = Arc::clone(&state.notifications);
match ontoref_daemon::ui::DriftWatcher::start(
&state.project_root,
project_name,
notif_store,
) {
Ok(w) => {
info!("drift watcher started");
Some(w)
}
Err(e) => {
warn!(error = %e, "drift watcher failed to start");
None
}
}
};
// MCP stdio mode — skips HTTP entirely; serves stdin/stdout to AI client.
#[cfg(feature = "mcp")]
if cli.mcp_stdio {
if let Err(e) = ontoref_daemon::mcp::serve_stdio(state).await {
error!(error = %e, "MCP stdio server error");
std::process::exit(1);
}
return;
}
let app = api::router(state).layer(TraceLayer::new_for_http().make_span_with(
|req: &axum::http::Request<_>| {
// Health-check endpoints are polled frequently — log at TRACE to
// keep DEBUG output free of noise.
if req.uri().path() == "/health" {
tracing::trace_span!(
"request",
method = %req.method(),
uri = %req.uri(),
version = ?req.version(),
)
} else {
tracing::debug_span!(
"request",
method = %req.method(),
uri = %req.uri(),
version = ?req.version(),
)
}
},
));
2026-03-13 00:18:14 +00:00
let listener = TcpListener::bind(addr).await.unwrap_or_else(|e| {
error!(addr = %addr, error = %e, "failed to bind");
std::process::exit(1);
});
// Write PID file only after successful bind
if let Some(ref pid_path) = cli.pid_file {
if let Err(e) = write_pid_file(pid_path) {
error!(path = %pid_path.display(), error = %e, "failed to write PID file");
}
}
info!(addr = %addr, "listening");
#[cfg(feature = "ui")]
if let Some((ref tdir, ref public)) = ui_startup {
#[cfg(feature = "tls")]
let scheme = if cli.tls_cert.is_some() && cli.tls_key.is_some() {
"https"
} else {
"http"
};
#[cfg(not(feature = "tls"))]
let scheme = "http";
info!(
url = %format!("{scheme}://{addr}/ui/"),
project_root = %project_root_str,
templates_dir = %tdir,
public_dir = %public,
"web UI available"
);
}
// Publish daemon.started event
#[cfg(feature = "nats")]
{
if let Some(ref nats) = nats_publisher {
if let Err(e) = nats.publish_started().await {
warn!(error = %e, "failed to publish daemon.started event");
}
}
}
// Spawn NATS event polling handler if enabled
#[cfg(feature = "nats")]
let _nats_handler = if let Some(ref nats) = nats_publisher {
let nats_clone = Arc::clone(nats);
let handle = tokio::spawn(async move {
handle_nats_events(nats_clone).await;
});
Some(handle)
} else {
None
};
// Spawn actor sweep task — reaps stale sessions periodically
let sweep_actors = Arc::clone(&actors);
#[cfg(feature = "nats")]
let sweep_nats = nats_publisher.clone();
let sweep_interval = cli.actor_sweep_interval;
let _sweep_task = tokio::spawn(async move {
actor_sweep_loop(
sweep_actors,
sweep_interval,
#[cfg(feature = "nats")]
sweep_nats,
)
.await;
});
// Idle timeout: spawn a watchdog that signals shutdown via watch channel.
let (shutdown_tx, mut shutdown_rx) = watch::channel(false);
if cli.idle_timeout > 0 {
let idle_secs = cli.idle_timeout;
let activity = Arc::clone(&last_activity);
tokio::spawn(idle_watchdog(activity, idle_secs, shutdown_tx));
}
// TLS serve path — takes priority when cert + key are both configured.
#[cfg(feature = "tls")]
if let (Some(cert), Some(key)) = (&cli.tls_cert, &cli.tls_key) {
let tls_config = match axum_server::tls_rustls::RustlsConfig::from_pem_file(cert, key).await
{
Ok(c) => c,
Err(e) => {
error!(error = %e, cert = %cert.display(), key = %key.display(),
"TLS config failed — aborting");
std::process::exit(1);
}
};
let handle = axum_server::Handle::new();
let shutdown_handle = handle.clone();
let mut tls_rx = shutdown_rx.clone();
tokio::spawn(async move {
let _ = tls_rx.wait_for(|&v| v).await;
shutdown_handle.graceful_shutdown(Some(std::time::Duration::from_secs(30)));
});
let std_listener = listener.into_std().unwrap_or_else(|e| {
error!(error = %e, "listener conversion failed");
std::process::exit(1);
});
#[cfg(feature = "nats")]
let tls_start = Instant::now();
if let Err(e) = axum_server::from_tcp_rustls(std_listener, tls_config)
.handle(handle)
.serve(app.into_make_service_with_connect_info::<SocketAddr>())
2026-03-13 00:18:14 +00:00
.await
{
error!(error = %e, "TLS server error");
}
#[cfg(feature = "nats")]
if let Some(ref nats) = nats_publisher {
let _ = nats.publish_stopped(tls_start.elapsed().as_secs()).await;
}
if let Some(ref pid_path) = cli.pid_file {
let _ = std::fs::remove_file(pid_path);
}
return;
}
// Plain HTTP serve path.
#[cfg(feature = "nats")]
let startup_instant = Instant::now();
let graceful = async move {
let _ = shutdown_rx.wait_for(|&v| v).await;
};
if let Err(e) = axum::serve(
listener,
app.into_make_service_with_connect_info::<SocketAddr>(),
)
.with_graceful_shutdown(graceful)
.await
2026-03-13 00:18:14 +00:00
{
error!(error = %e, "server error");
}
// Publish daemon.stopped event on graceful shutdown
#[cfg(feature = "nats")]
{
if let Some(ref nats) = nats_publisher {
let uptime_secs = startup_instant.elapsed().as_secs();
if let Err(e) = nats.publish_stopped(uptime_secs).await {
warn!(error = %e, "failed to publish daemon.stopped event");
}
}
}
// Cleanup PID file
if let Some(ref pid_path) = cli.pid_file {
let _ = std::fs::remove_file(pid_path);
}
}
async fn idle_watchdog(activity: Arc<AtomicU64>, idle_secs: u64, shutdown: watch::Sender<bool>) {
let check_interval = std::time::Duration::from_secs(30);
loop {
tokio::time::sleep(check_interval).await;
let last = activity.load(Ordering::Relaxed);
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let idle = now.saturating_sub(last);
if idle >= idle_secs {
info!(idle, idle_secs, "idle timeout reached — shutting down");
let _ = shutdown.send(true);
return;
}
}
}
/// Periodic sweep of stale actor sessions.
/// Local actors: checked via `kill -0 <pid>`. Remote actors: `last_seen`
/// timeout. Publishes `actor.deregistered` events via NATS for reaped sessions.
async fn actor_sweep_loop(
actors: Arc<ActorRegistry>,
interval_secs: u64,
#[cfg(feature = "nats")] nats: Option<Arc<ontoref_daemon::nats::NatsPublisher>>,
) {
let interval = std::time::Duration::from_secs(interval_secs);
loop {
tokio::time::sleep(interval).await;
let reaped = actors.sweep_stale();
#[cfg(feature = "nats")]
publish_reaped_actors(&nats, &reaped).await;
#[cfg(not(feature = "nats"))]
let _ = reaped;
}
}
#[cfg(feature = "nats")]
async fn publish_reaped_actors(
nats: &Option<Arc<ontoref_daemon::nats::NatsPublisher>>,
reaped: &[String],
) {
let Some(ref nats) = nats else { return };
for token in reaped {
if let Err(e) = nats.publish_actor_deregistered(token, "stale_sweep").await {
warn!(error = %e, token = %token, "failed to publish actor.deregistered");
}
}
}
#[cfg(feature = "db")]
async fn connect_db(cli: &Cli) -> Option<Arc<stratum_db::StratumDb>> {
let db_url = cli.db_url.as_ref()?;
let namespace = cli.db_namespace.as_deref().unwrap_or("ontoref");
let connect_timeout = std::time::Duration::from_secs(5);
let db = match tokio::time::timeout(
connect_timeout,
stratum_db::StratumDb::connect_remote(
db_url,
namespace,
"daemon",
&cli.db_username,
&cli.db_password,
),
)
.await
{
Ok(Ok(db)) => db,
Ok(Err(e)) => {
error!(error = %e, "SurrealDB connection failed — running without persistence");
return None;
}
Err(_) => {
error!(url = %db_url, "SurrealDB connection timed out (5s) — running without persistence");
return None;
}
};
let health_timeout = std::time::Duration::from_secs(5);
match tokio::time::timeout(health_timeout, db.health_check()).await {
Ok(Ok(())) => {
info!(url = %db_url, namespace = %namespace, "SurrealDB connected and healthy");
}
Ok(Err(e)) => {
error!(error = %e, "SurrealDB health check failed — running without persistence");
return None;
}
Err(_) => {
error!("SurrealDB health check timed out — running without persistence");
return None;
}
}
if let Err(e) = db.initialize_tables().await {
warn!(error = %e, "table initialization failed — proceeding with cache only");
return None;
}
info!("Level 1 ontology tables initialized");
Some(Arc::new(db))
}
#[cfg(feature = "ui")]
fn resolve_asset_dir(project_root: &std::path::Path, config_dir: &str) -> std::path::PathBuf {
let from_root = project_root.join(config_dir);
if from_root.exists() {
return from_root;
}
// Fall back to ~/.local/share/ontoref-daemon/<basename> (XDG-style install
// location)
let basename = std::path::Path::new(config_dir)
.file_name()
.unwrap_or_default();
if let Some(home) = std::env::var_os("HOME") {
let share = std::path::PathBuf::from(home)
.join(".local/share/ontoref-daemon")
.join(basename);
if share.exists() {
return share;
}
}
from_root
}
2026-03-13 00:18:14 +00:00
#[cfg(feature = "ui")]
fn apply_ui_config(cli: &mut Cli, config: &serde_json::Value) {
let Some(ui) = config.get("ui").and_then(|u| u.as_object()) else {
return;
};
if cli.templates_dir.is_none() {
let dir = ui
.get("templates_dir")
.and_then(|d| d.as_str())
.unwrap_or("");
if !dir.is_empty() {
cli.templates_dir = Some(resolve_asset_dir(&cli.project_root, dir));
2026-03-13 00:18:14 +00:00
}
}
if cli.public_dir.is_none() {
let dir = ui.get("public_dir").and_then(|d| d.as_str()).unwrap_or("");
if !dir.is_empty() {
cli.public_dir = Some(resolve_asset_dir(&cli.project_root, dir));
2026-03-13 00:18:14 +00:00
}
}
#[cfg(feature = "tls")]
{
if cli.tls_cert.is_none() {
let p = ui.get("tls_cert").and_then(|d| d.as_str()).unwrap_or("");
if !p.is_empty() {
cli.tls_cert = Some(cli.project_root.join(p));
}
}
if cli.tls_key.is_none() {
let p = ui.get("tls_key").and_then(|d| d.as_str()).unwrap_or("");
if !p.is_empty() {
cli.tls_key = Some(cli.project_root.join(p));
}
}
}
}
fn write_pid_file(path: &PathBuf) -> std::io::Result<()> {
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::write(path, std::process::id().to_string())
}
/// Poll NATS JetStream for incoming events.
#[cfg(feature = "nats")]
async fn handle_nats_events(nats: Arc<ontoref_daemon::nats::NatsPublisher>) {
loop {
let events = match nats.pull_events(10).await {
Ok(ev) => ev,
Err(e) => {
warn!("NATS poll error: {e}");
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
continue;
}
};
for (subject, payload) in events {
dispatch_nats_event(&subject, &payload);
}
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
}
}
#[cfg(feature = "nats")]
fn dispatch_nats_event(subject: &str, payload: &serde_json::Value) {
use ontoref_daemon::nats::NatsPublisher;
if subject != "ecosystem.reflection.request" {
return;
}
if let Some((mode_id, _params)) = NatsPublisher::parse_reflection_request(payload) {
info!(mode_id = %mode_id, "received reflection.request via JetStream");
}
}