stratumiops/crates/stratum-llm/src/providers/ollama.rs

use async_trait::async_trait;

use crate::error::LlmError;
use crate::providers::{
    GenerationOptions, GenerationResponse, LlmProvider, Message, StreamResponse,
};

#[cfg(feature = "ollama")]
pub struct OllamaProvider {
    client: reqwest::Client,
    base_url: String,
    model: String,
}

#[cfg(feature = "ollama")]
impl OllamaProvider {
    pub fn new(base_url: impl Into<String>, model: impl Into<String>) -> Self {
        Self {
            client: reqwest::Client::new(),
            base_url: base_url.into(),
            model: model.into(),
        }
    }

    pub fn from_env(model: impl Into<String>) -> Self {
        let base_url =
            std::env::var("OLLAMA_HOST").unwrap_or_else(|_| "http://localhost:11434".to_string());
        Self::new(base_url, model)
    }

    pub fn llama3() -> Self {
        Self::from_env("llama3")
    }

    pub fn codellama() -> Self {
        Self::from_env("codellama")
    }

    pub fn mistral() -> Self {
        Self::from_env("mistral")
    }
}

#[cfg(feature = "ollama")]
impl Default for OllamaProvider {
    fn default() -> Self {
        Self::llama3()
    }
}

#[cfg(feature = "ollama")]
#[async_trait]
impl LlmProvider for OllamaProvider {
    fn name(&self) -> &str {
        "ollama"
    }

    fn model(&self) -> &str {
        &self.model
    }

    async fn is_available(&self) -> bool {
        self.client
            .get(format!("{}/api/tags", self.base_url))
            .send()
            .await
            .is_ok()
    }

    async fn generate(
        &self,
        messages: &[Message],
        options: &GenerationOptions,
    ) -> Result<GenerationResponse, LlmError> {
        let start = std::time::Instant::now();

        let prompt = messages
            .iter()
            .map(|m| match m.role {
                crate::providers::Role::System => format!("System: {}", m.content),
                crate::providers::Role::User => format!("User: {}", m.content),
                crate::providers::Role::Assistant => format!("Assistant: {}", m.content),
            })
            .collect::<Vec<_>>()
            .join("\n\n");

        let mut body = serde_json::json!({
            "model": self.model,
            "prompt": prompt,
            "stream": false,
        });

        if let Some(temp) = options.temperature {
            body["temperature"] = serde_json::json!(temp);
        }
        if let Some(top_p) = options.top_p {
            body["top_p"] = serde_json::json!(top_p);
        }
        if !options.stop_sequences.is_empty() {
            body["stop"] = serde_json::json!(options.stop_sequences);
        }

        let response = self
            .client
            .post(format!("{}/api/generate", self.base_url))
            .json(&body)
            .send()
            .await
            .map_err(|e| LlmError::Network(e.to_string()))?;

        if !response.status().is_success() {
            let status = response.status();
            let text = response.text().await.unwrap_or_default();
            return Err(LlmError::Api(format!("{}: {}", status, text)));
        }

        let json: serde_json::Value = response
            .json()
            .await
            .map_err(|e| LlmError::Parse(e.to_string()))?;

        let content = json["response"].as_str().unwrap_or("").to_string();

        let input_tokens = prompt.len() as u32 / 4;
        let output_tokens = content.len() as u32 / 4;

        Ok(GenerationResponse {
            content,
            model: self.model.clone(),
            provider: "ollama".to_string(),
            input_tokens,
            output_tokens,
            cost_cents: 0.0,
            latency_ms: start.elapsed().as_millis() as u64,
        })
    }

    async fn stream(
        &self,
        _messages: &[Message],
        _options: &GenerationOptions,
    ) -> Result<StreamResponse, LlmError> {
        Err(LlmError::Unavailable(
            "Streaming not yet implemented".to_string(),
        ))
    }

    fn estimate_cost(&self, _input_tokens: u32, _output_tokens: u32) -> f64 {
        0.0
    }

    fn cost_per_1m_input(&self) -> f64 {
        0.0
    }

    fn cost_per_1m_output(&self) -> f64 {
        0.0
    }
}
chore: create stratum-embeddings and stratum-llm crates, docs 2026-01-24 02:03:12 +00:00			`use async_trait::async_trait;`

			`use crate::error::LlmError;`
			`use crate::providers::{`
			`GenerationOptions, GenerationResponse, LlmProvider, Message, StreamResponse,`
			`};`

			`#[cfg(feature = "ollama")]`
			`pub struct OllamaProvider {`
			`client: reqwest::Client,`
			`base_url: String,`
			`model: String,`
			`}`

			`#[cfg(feature = "ollama")]`
			`impl OllamaProvider {`
			`pub fn new(base_url: impl Into<String>, model: impl Into<String>) -> Self {`
			`Self {`
			`client: reqwest::Client::new(),`
			`base_url: base_url.into(),`
			`model: model.into(),`
			`}`
			`}`

			`pub fn from_env(model: impl Into<String>) -> Self {`
			`let base_url =`
			`std::env::var("OLLAMA_HOST").unwrap_or_else(\|_\| "http://localhost:11434".to_string());`
			`Self::new(base_url, model)`
			`}`

			`pub fn llama3() -> Self {`
			`Self::from_env("llama3")`
			`}`

			`pub fn codellama() -> Self {`
			`Self::from_env("codellama")`
			`}`

			`pub fn mistral() -> Self {`
			`Self::from_env("mistral")`
			`}`
			`}`

			`#[cfg(feature = "ollama")]`
			`impl Default for OllamaProvider {`
			`fn default() -> Self {`
			`Self::llama3()`
			`}`
			`}`

			`#[cfg(feature = "ollama")]`
			`#[async_trait]`
			`impl LlmProvider for OllamaProvider {`
			`fn name(&self) -> &str {`
			`"ollama"`
			`}`

			`fn model(&self) -> &str {`
			`&self.model`
			`}`

			`async fn is_available(&self) -> bool {`
			`self.client`
			`.get(format!("{}/api/tags", self.base_url))`
			`.send()`
			`.await`
			`.is_ok()`
			`}`

			`async fn generate(`
			`&self,`
			`messages: &[Message],`
			`options: &GenerationOptions,`
			`) -> Result<GenerationResponse, LlmError> {`
			`let start = std::time::Instant::now();`

			`let prompt = messages`
			`.iter()`
			`.map(\|m\| match m.role {`
			`crate::providers::Role::System => format!("System: {}", m.content),`
			`crate::providers::Role::User => format!("User: {}", m.content),`
			`crate::providers::Role::Assistant => format!("Assistant: {}", m.content),`
			`})`
			`.collect::<Vec<_>>()`
			`.join("\n\n");`

			`let mut body = serde_json::json!({`
			`"model": self.model,`
			`"prompt": prompt,`
			`"stream": false,`
			`});`

			`if let Some(temp) = options.temperature {`
			`body["temperature"] = serde_json::json!(temp);`
			`}`
			`if let Some(top_p) = options.top_p {`
			`body["top_p"] = serde_json::json!(top_p);`
			`}`
			`if !options.stop_sequences.is_empty() {`
			`body["stop"] = serde_json::json!(options.stop_sequences);`
			`}`

			`let response = self`
			`.client`
			`.post(format!("{}/api/generate", self.base_url))`
			`.json(&body)`
			`.send()`
			`.await`
			`.map_err(\|e\| LlmError::Network(e.to_string()))?;`

			`if !response.status().is_success() {`
			`let status = response.status();`
			`let text = response.text().await.unwrap_or_default();`
			`return Err(LlmError::Api(format!("{}: {}", status, text)));`
			`}`

			`let json: serde_json::Value = response`
			`.json()`
			`.await`
			`.map_err(\|e\| LlmError::Parse(e.to_string()))?;`

			`let content = json["response"].as_str().unwrap_or("").to_string();`

			`let input_tokens = prompt.len() as u32 / 4;`
			`let output_tokens = content.len() as u32 / 4;`

			`Ok(GenerationResponse {`
			`content,`
			`model: self.model.clone(),`
			`provider: "ollama".to_string(),`
			`input_tokens,`
			`output_tokens,`
			`cost_cents: 0.0,`
			`latency_ms: start.elapsed().as_millis() as u64,`
			`})`
			`}`

			`async fn stream(`
			`&self,`
			`_messages: &[Message],`
			`_options: &GenerationOptions,`
			`) -> Result<StreamResponse, LlmError> {`
			`Err(LlmError::Unavailable(`
			`"Streaming not yet implemented".to_string(),`
			`))`
			`}`

			`fn estimate_cost(&self, _input_tokens: u32, _output_tokens: u32) -> f64 {`
			`0.0`
			`}`

			`fn cost_per_1m_input(&self) -> f64 {`
			`0.0`
			`}`

			`fn cost_per_1m_output(&self) -> f64 {`
			`0.0`
			`}`
			`}`