ChatModel - Ollama

Ollama 模型

一个针对 Eino 的 Ollama 模型实现,实现了 ToolCallingChatModel 接口。这使得能够与 Eino 的 LLM 功能无缝集成,以增强自然语言处理和生成能力。

特性

  • 实现了 github.com/cloudwego/eino/components/model.Model
  • 轻松与 Eino 的模型系统集成
  • 可配置的模型参数
  • 支持聊天补全
  • 支持流式响应
  • 支持自定义响应解析
  • 灵活的模型配置

安装

go get github.com/cloudwego/eino-ext/components/model/ollama@latest

快速开始

以下是如何使用 Ollama 模型的快速示例:

package main

import (
        "context"
        "log"
        "os"

        "github.com/cloudwego/eino/schema"

        "github.com/cloudwego/eino-ext/components/model/ollama"
)

func main() {
        ctx := context.Background()
        modelName := os.Getenv("MODEL_NAME")
        
        chatModel, err := ollama.NewChatModel(ctx, &ollama.ChatModelConfig{
                BaseURL: "http://localhost:11434",
                Model:   modelName,
        })
        if err != nil {
                log.Printf("NewChatModel failed, err=%v\n", err)
                return
        }

        resp, err := chatModel.Generate(ctx, []*schema.Message{
                {
                        Role:    schema.User,
                        Content: "as a machine, how do you answer user's question?",
                },
        })
        if err != nil {
                log.Printf("Generate failed, err=%v\n", err)
                return
        }

        log.Printf("output: \n%v\n", resp)
}

配置

可以使用 ollama.ChatModelConfig 结构体配置模型:

type ChatModelConfig struct {
    BaseURL string        `json:"base_url"`
    Timeout time.Duration `json:"timeout"` // request timeout for http client
    
    // HTTPClient specifies the client to send HTTP requests.
    // If HTTPClient is set, Timeout will not be used.
    // Optional. Default &http.Client{Timeout: Timeout}
    HTTPClient *http.Client `json:"http_client"`
    
    Model     string          `json:"model"`
    Format    json.RawMessage `json:"format"`
    KeepAlive *time.Duration  `json:"keep_alive"`
    
    Options *Options `json:"options"`
    
    Thinking *ThinkValue `json:"thinking"`
}


type Options struct {
    Runner
    
    // NumKeep specifies the number of tokens from the prompt to retain when the context size is exceeded and tokens need to be trimmed.
    NumKeep int `json:"num_keep,omitempty"`
    // Seed sets the random number seed for the model. Using the same seed with the same parameters will produce the same output.
    Seed int `json:"seed,omitempty"`
    // NumPredict sets the maximum number of tokens to generate.
    NumPredict int `json:"num_predict,omitempty"`
    // TopK controls the diversity of the generated text by limiting the selection of tokens to the top k most likely tokens.
    TopK int `json:"top_k,omitempty"`
    // TopP, also known as nucleus sampling, is another way to control the diversity of the generated text. It filters out the least likely tokens whose cumulative probability is below a certain threshold.
    TopP float32 `json:"top_p,omitempty"`
    // MinP is a parameter that works with TopP to ensure that the generated text is not too constrained. It sets a minimum probability for a token to be considered.
    MinP float32 `json:"min_p,omitempty"`
    // TypicalP is a parameter that helps to generate more "typical" or expected text by sampling from a reduced set of tokens that are considered typical.
    TypicalP float32 `json:"typical_p,omitempty"`
    // RepeatLastN specifies how many of the last N tokens to consider for penalizing repetition.
    RepeatLastN int `json:"repeat_last_n,omitempty"`
    // Temperature controls the randomness of the generated text. A higher temperature results in more random and creative output, while a lower temperature produces more predictable and conservative text.
    Temperature float32 `json:"temperature,omitempty"`
    // RepeatPenalty is used to penalize the model for repeating tokens that have already appeared in the generated text.
    RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
    // PresencePenalty is used to penalize the model for introducing new tokens that were not present in the prompt.
    PresencePenalty float32 `json:"presence_penalty,omitempty"`
    // FrequencyPenalty is used to penalize the model for using tokens that appear frequently in the training data.
    FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
    // Stop is a list of strings that will cause the generation to stop if they are encountered.
    Stop []string `json:"stop,omitempty"`
}

type ThinkValue struct {
    // Value can be a bool or string
    Value interface{}
}

示例

文本生成

package main

import (
        "context"
        "log"
        "os"

        "github.com/cloudwego/eino/schema"

        "github.com/cloudwego/eino-ext/components/model/ollama"
)

func main() {
        ctx := context.Background()
        modelName := os.Getenv("MODEL_NAME")
        chatModel, err := ollama.NewChatModel(ctx, &ollama.ChatModelConfig{
                BaseURL: "http://localhost:11434",
                Model:   modelName,
        })
        if err != nil {
                log.Printf("NewChatModel failed, err=%v\n", err)
                return
        }

        resp, err := chatModel.Generate(ctx, []*schema.Message{
                {
                        Role:    schema.User,
                        Content: "as a machine, how do you answer user's question?",
                },
        })
        if err != nil {
                log.Printf("Generate failed, err=%v\n", err)
                return
        }

        log.Printf("output: \n%v\n", resp)
}

多模态支持(图片理解)

package main

import (
        "context"
        "fmt"
        "log"
        "os"

        "github.com/cloudwego/eino/schema"

        "github.com/cloudwego/eino-ext/components/model/ollama"
)

func main() {
        ctx := context.Background()
        modelName := os.Getenv("MODEL_NAME")
        chatModel, err := ollama.NewChatModel(ctx, &ollama.ChatModelConfig{
                BaseURL: "http://localhost:11434",
                Model:   modelName,
        })
        if err != nil {
                log.Printf("NewChatModel failed, err=%v\n", err)
                return
        }

        multiModalMsg := &schema.Message{
                UserInputMultiContent: []schema.MessageInputPart{
                        {
                                Type: schema.ChatMessagePartTypeText,
                                Text: "this picture is a landscape photo, what's the picture's content",
                        },
                        {
                                Type: schema.ChatMessagePartTypeImageURL,
                                Image: &schema.MessageInputImage{
                                        MessagePartCommon: schema.MessagePartCommon{
                                                URL: of("https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcT11qEDxU4X_MVKYQVU5qiAVFidA58f8GG0bQ&s"),
                                        },
                                        Detail: schema.ImageURLDetailAuto,
                                },
                        },
                },
        }

        resp, err := chatModel.Generate(ctx, []*schema.Message{
                multiModalMsg,
        })
        if err != nil {
                log.Fatalf("Generate failed, err=%v", err)
        }

        fmt.Printf("output: \n%v", resp)
}

func of[T any](a T) *T {
        return &a
}

流式生成

package main

import (
        "context"
        "fmt"
        "io"
        "log"
        "os"

        "github.com/cloudwego/eino/schema"

        "github.com/cloudwego/eino-ext/components/model/ollama"
)

func main() {
        ctx := context.Background()
        modelName := os.Getenv("MODEL_NAME")
        chatModel, err := ollama.NewChatModel(ctx, &ollama.ChatModelConfig{
                BaseURL: "http://localhost:11434",
                Model:   modelName,
        })
        if err != nil {
                log.Printf("NewChatModel failed, err=%v\n", err)
                return
        }

        streamMsgs, err := chatModel.Stream(ctx, []*schema.Message{
                {
                        Role:    schema.User,
                        Content: "as a machine, how do you answer user's question?",
                },
        })

        if err != nil {
                log.Printf("Generate failed, err=%v", err)
                return
        }

        defer streamMsgs.Close()

        log.Println("typewriter output:")
        for {
                msg, err := streamMsgs.Recv()
                if err == io.EOF {
                        break
                }
                if err != nil {
                        log.Printf("\nstream.Recv failed, err=%v", err)
                        return
                }
                fmt.Print(msg.Content)
        }

        fmt.Print("\n")
}

工具调用

package main

import (
        "context"
        "log"
        "os"

        "github.com/cloudwego/eino/schema"

        "github.com/cloudwego/eino-ext/components/model/ollama"
)

func main() {

        ctx := context.Background()
        modelName := os.Getenv("MODEL_NAME")
        chatModel, err := ollama.NewChatModel(ctx, &ollama.ChatModelConfig{
                BaseURL: "http://localhost:11434",
                Model:   modelName,
        })
        if err != nil {
                log.Printf("NewChatModel failed, err=%v", err)
                return
        }

        err = chatModel.BindTools([]*schema.ToolInfo{
                {
                        Name: "user_company",
                        Desc: "Query the user's company and position information based on their name and email",
                        ParamsOneOf: schema.NewParamsOneOfByParams(
                                map[string]*schema.ParameterInfo{
                                        "name": {
                                                Type: "string",
                                                Desc: "The user's name",
                                        },
                                        "email": {
                                                Type: "string",
                                                Desc: "The user's email",
                                        },
                                }),
                },
                {
                        Name: "user_salary",
                        Desc: "Query the user's salary information based on their name and email",
                        ParamsOneOf: schema.NewParamsOneOfByParams(
                                map[string]*schema.ParameterInfo{
                                        "name": {
                                                Type: "string",
                                                Desc: "The user's name",
                                        },
                                        "email": {
                                                Type: "string",
                                                Desc: "The user's email",
                                        },
                                }),
                },
        })
        if err != nil {
                log.Printf("BindForcedTools failed, err=%v", err)
                return
        }

        resp, err := chatModel.Generate(ctx, []*schema.Message{
                {
                        Role:    schema.System,
                        Content: "You are a real estate agent. Use the user_company and user_salary APIs to provide relevant property information based on the user's salary and job. Email is required",
                },
                {
                        Role:    schema.User,
                        Content: "My name is zhangsan, and my email is zhangsan@bytedance.com. Please recommend some suitable houses for me.",
                },
        })

        if err != nil {
                log.Printf("Generate failed, err=%v", err)
                return
        }

        log.Printf("output: \n%+v", resp)
}

开启 Thinking 模式

package main

import (
        "context"
        "log"
        "os"

        "github.com/cloudwego/eino/schema"
        ollamaapi "github.com/eino-contrib/ollama/api"

        "github.com/cloudwego/eino-ext/components/model/ollama"
)

func main() {
        ctx := context.Background()
        modelName := os.Getenv("MODEL_NAME")
        thinking := ollamaapi.ThinkValue{Value: true}
        chatModel, err := ollama.NewChatModel(ctx, &ollama.ChatModelConfig{
                BaseURL:  "http://localhost:11434",
                Model:    modelName,
                Thinking: &thinking,
        })
        if err != nil {
                log.Printf("NewChatModel failed, err=%v\n", err)
                return
        }

        resp, err := chatModel.Generate(ctx, []*schema.Message{
                {
                        Role:    schema.User,
                        Content: "as a machine, how do you answer user's question?",
                },
        })
        if err != nil {
                log.Printf("Generate failed, err=%v\n", err)
                return
        }

        log.Printf("output thinking: \n%v\n", resp.ReasoningContent)
        log.Printf("output content: \n%v\n", resp.Content)
}

基本介绍

~~Ollama 模型是 ChatModel 接口的一个实现,用于与 Ollama 本地大语言模型服务进行交互,Ollama 是一个开源的本地大语言模型运行框架,支持多种开源模型(如 Llama、Mistral 等),提供简单的 API 接口和完整的性能监控。。该组件实现了 ~~Eino: ChatModel 使用说明

使用方式

组件初始化

Ollama 模型通过 NewChatModel 函数进行初始化,主要配置参数如下:

import (
    "github.com/cloudwego/eino-ext/components/model/ollama"
    "github.com/ollama/ollama/api"
)

model, err := ollama.NewChatModel(ctx, &ollama.ChatModelConfig{
    // 基础配置
    BaseURL: "http://localhost:11434", // Ollama 服务地址
    Timeout: 30 * time.Second,         // 请求超时时间

    // 模型配置
    Model:     "llama2",   // 模型名称
    Format:    json.RawMessage("json"),     // 输出格式(可选)
    KeepAlive: &keepAlive, // 保持连接时间

    // 模型参数
    Options: &api.Options{
       Runner: api.Runner{
          NumCtx:    4096, // 上下文窗口大小
          NumGPU:    1,    // GPU 数量
          NumThread: 4,    // CPU 线程数
       },
       Temperature:   0.7,        // 温度
       TopP:          0.9,        // Top-P 采样
       TopK:          40,         // Top-K 采样
       Seed:          42,         // 随机种子
       NumPredict:    100,        // 最大生成长度
       Stop:          []string{}, // 停止词
       RepeatPenalty: 1.1,        // 重复惩罚
    },
})

生成对话

对话生成支持普通模式和流式模式:

// 普通模式
response, err := model.Generate(ctx, messages)
    
// 流式模式
stream, err := model.Stream(ctx, messages)

消息格式示例:

import "github.com/cloudwego/eino/schema"

messages := []*schema.Message{
    // 系统消息
    schema.SystemMessage("你是一个助手"),
        
    // 用户消息
    schema.UserMessage("你好")
}

工具调用

支持绑定工具:

注意,仅有支持 function call 的模型才能使用这个能力

import "github.com/cloudwego/eino/schema"

// 定义工具
tools := []*schema.ToolInfo{
    {
       Name: "search",
       Desc: "搜索信息",
       ParamsOneOf: schema.NewParamsOneOfByParams(map[string]*schema.ParameterInfo{
          "query": {
             Type:     schema.String,
             Desc:     "搜索关键词",
             Required: true,
          },
       }),
    },
}

// 绑定工具
err := model.BindTools(tools)

完整使用示例

基本对话

package main

import (
    "context"
    "time"

    "github.com/cloudwego/eino-ext/components/model/ollama"
    "github.com/cloudwego/eino/schema"
    "github.com/ollama/ollama/api"
)

func main() {
    ctx := context.Background()

    // 初始化模型
    model, err := ollama.NewChatModel(ctx, &ollama.ChatModelConfig{
       BaseURL: "http://localhost:11434",
       Timeout: 30 * time.Second,
       Model:   "llama2",
       Options: &api.Options{
          Temperature: 0.7,
          NumPredict:  100,
       },
    })
    if err != nil {
       panic(err)
    }

    // 准备消息
    messages := []*schema.Message{
       schema.SystemMessage("你是一个助手"),
       schema.UserMessage("介绍一下 Ollama"),
    }

    // 生成回复
    response, err := model.Generate(ctx, messages)
    if err != nil {
       panic(err)
    }

    // 处理回复
    println(response.Content)
}

流式对话

package main

import (
    "context"
    "time"
    
    "github.com/cloudwego/eino-ext/components/model/ollama"
    "github.com/cloudwego/eino/schema"
)

func main() {
    ctx := context.Background()
    
    // 初始化模型
    model, err := ollama.NewChatModel(ctx, &ollama.ChatModelConfig{
        BaseURL:  "http://localhost:11434",
        Timeout:  30 * time.Second,
        Model:    "llama2",
    })
    if err != nil {
        panic(err)
    }
    
    // 准备消息
    messages := []*schema.Message{
        schema.SystemMessage("你是一个助手"),
        schema.UserMessage("讲个笑话"),
    }
    
    // 获取流式回复
    stream, err := model.Stream(ctx, messages)
    if err != nil {
        panic(err)
    }
    defer stream.Close() // 注意关闭 reader
    
    // 处理流式内容
    for {
        chunk, err := stream.Recv()
        if err != nil {
            break
        }
        print(chunk.Content)
    }
}

更多示例

相关文档


最后修改 December 12, 2025 : chore: update websocket docs (#1479) (967538e)