humanus.cpp/include/llm.h

135 lines
5.0 KiB
C
Raw Normal View History

2025-03-16 17:17:01 +08:00
#ifndef HUMANUS_LLM_H
#define HUMANUS_LLM_H
#include "config.h"
#include "logger.h"
#include "schema.h"
2025-03-23 14:35:54 +08:00
#include "httplib.h"
2025-03-16 17:17:01 +08:00
#include <map>
#include <string>
#include <memory>
#include <vector>
#include <functional>
#include <stdexcept>
#include <future>
namespace humanus {
class LLM {
private:
static std::unordered_map<std::string, std::shared_ptr<LLM>> instances_;
2025-03-16 17:17:01 +08:00
2025-03-16 22:56:03 +08:00
std::unique_ptr<httplib::Client> client_;
2025-03-16 17:17:01 +08:00
2025-03-20 16:03:26 +08:00
std::shared_ptr<LLMConfig> llm_config_;
2025-03-19 18:44:54 +08:00
2025-03-23 14:35:54 +08:00
std::shared_ptr<ToolParser> tool_parser_;
size_t total_prompt_tokens_;
size_t total_completion_tokens_;
2025-03-16 17:17:01 +08:00
2025-03-17 01:58:37 +08:00
public:
2025-03-17 16:35:11 +08:00
// Constructor
LLM(const std::string& config_name, const std::shared_ptr<LLMConfig>& config = nullptr, const std::shared_ptr<ToolParser>& tool_parser = nullptr) : llm_config_(config), tool_parser_(tool_parser) {
2025-03-23 14:35:54 +08:00
if (!llm_config_->oai_tool_support && !tool_parser_) {
if (Config::get_instance().tool_parser().find(config_name) == Config::get_instance().tool_parser().end()) {
logger->warn("Tool helper config not found: " + config_name + ", falling back to default tool helper config.");
tool_parser_ = std::make_shared<ToolParser>(Config::get_instance().tool_parser().at("default"));
} else {
tool_parser_ = std::make_shared<ToolParser>(Config::get_instance().tool_parser().at(config_name));
2025-03-19 18:44:54 +08:00
}
}
2025-03-17 01:58:37 +08:00
client_ = std::make_unique<httplib::Client>(llm_config_->base_url);
2025-03-16 22:56:03 +08:00
client_->set_default_headers({
2025-03-17 01:58:37 +08:00
{"Authorization", "Bearer " + llm_config_->api_key}
2025-03-16 22:56:03 +08:00
});
2025-03-19 18:44:54 +08:00
client_->set_read_timeout(llm_config_->timeout);
total_prompt_tokens_ = 0;
total_completion_tokens_ = 0;
2025-03-16 17:17:01 +08:00
}
2025-03-17 16:35:11 +08:00
// Get the singleton instance
2025-03-20 16:03:26 +08:00
static std::shared_ptr<LLM> get_instance(const std::string& config_name = "default", const std::shared_ptr<LLMConfig>& llm_config = nullptr) {
if (instances_.find(config_name) == instances_.end()) {
auto llm_config_ = llm_config;
if (!llm_config_) {
if (Config::get_instance().llm().find(config_name) == Config::get_instance().llm().end()) {
logger->warn("LLM config not found: " + config_name + ", falling back to default LLM config.");
llm_config_ = std::make_shared<LLMConfig>(Config::get_instance().llm().at("default"));
} else {
llm_config_ = std::make_shared<LLMConfig>(Config::get_instance().llm().at(config_name));
}
}
instances_[config_name] = std::make_shared<LLM>(config_name, llm_config_);
2025-03-16 17:17:01 +08:00
}
return instances_[config_name];
2025-03-16 17:17:01 +08:00
}
2025-03-26 19:28:02 +08:00
bool enable_vision() const {
return llm_config_->enable_vision;
}
std::string vision_details() const {
return llm_config_->vision_details;
}
2025-03-16 17:17:01 +08:00
/**
2025-03-17 16:35:11 +08:00
* @brief Format the message list to the format that LLM can accept
* @param messages Message object message list
* @return The formatted message list
* @throws std::invalid_argument If the message format is invalid or missing necessary fields
* @throws std::runtime_error If the message type is not supported
2025-03-16 17:17:01 +08:00
*/
2025-03-26 19:28:02 +08:00
json format_messages(const std::vector<Message>& messages);
2025-03-16 17:17:01 +08:00
/**
2025-03-17 16:35:11 +08:00
* @brief Send a request to the LLM and get the reply
* @param messages The conversation message list
2025-03-19 18:44:54 +08:00
* @param system_prompt Optional system message
2025-03-20 01:12:15 +08:00
* @param next_step_prompt Optional prompt message for the next step
2025-03-17 16:35:11 +08:00
* @param max_retries The maximum number of retries
* @return The generated assistant content
* @throws std::invalid_argument If the message is invalid or the reply is empty
* @throws std::runtime_error If the API call fails
2025-03-16 17:17:01 +08:00
*/
std::string ask(
const std::vector<Message>& messages,
2025-03-19 18:44:54 +08:00
const std::string& system_prompt = "",
2025-03-20 01:12:15 +08:00
const std::string& next_step_prompt = "",
2025-03-16 17:17:01 +08:00
int max_retries = 3
2025-03-26 19:28:02 +08:00
);
2025-03-16 17:17:01 +08:00
/**
2025-03-17 16:35:11 +08:00
* @brief Send a request to the LLM with tool functions
* @param messages The conversation message list
2025-03-19 18:44:54 +08:00
* @param system_prompt Optional system message
* @param next_step_prompt Optinonal prompt message for the next step
2025-03-17 16:35:11 +08:00
* @param tools The tool list
* @param tool_choice The tool choice strategy
* @param max_retries The maximum number of retries
* @return The generated assistant message (content, tool_calls)
* @throws std::invalid_argument If the tool, tool choice or message is invalid
* @throws std::runtime_error If the API call fails
2025-03-16 17:17:01 +08:00
*/
json ask_tool(
const std::vector<Message>& messages,
2025-03-19 18:44:54 +08:00
const std::string& system_prompt = "",
const std::string& next_step_prompt = "",
const json& tools = {},
2025-03-16 17:17:01 +08:00
const std::string& tool_choice = "auto",
2025-03-16 22:56:03 +08:00
int max_retries = 3
2025-03-26 19:28:02 +08:00
);
size_t total_prompt_tokens() const {
return total_prompt_tokens_;
}
size_t total_completion_tokens() const {
return total_completion_tokens_;
}
2025-03-16 17:17:01 +08:00
};
} // namespace humanus
#endif // HUMANUS_LLM_H