update README; fix bug for deepseek-chat (could not handle array content)

main
hkr04 2025-04-13 00:02:18 +08:00
parent d7f0f63149
commit f6cc8995fb
23 changed files with 295 additions and 107 deletions

4
.gitignore vendored
View File

@ -81,3 +81,7 @@ examples/server/webui/dist
__pycache__/
*/poetry.lock
poetry.toml
# Configs
config/config*.toml

View File

@ -21,29 +21,39 @@ cmake --build build --config Release
## How to Run
### Configuration
Switch to your own configration first:
1. Replace `base_url`, `api_key`, .etc in `config/config_llm.toml` according to your need.
2. Fill in `args` after `"@modelcontextprotocol/server-filesystem"` for `filesystem` to control the access to files. For example:
1. Copy configuration files from `config/example` to `config`.
2. Replace `base_url`, `api_key`, .etc in `config/config_llm.toml` and other configurations in `config/config*.toml` according to your need.
> Note: `llama-server` in [llama.cpp](https://github.com/ggml-org/llama.cpp) also support embedding models.
3. Fill in `args` after `"@modelcontextprotocol/server-filesystem"` for `filesystem` to control the access to files. For example:
```
[filesystem]
type = "stdio"
command = "npx"
args = ["-y",
"@modelcontextprotocol/server-filesystem",
"/Users/{username}/Desktop",
"/Users/{Username}/Desktop",
"other/path/to/your/files]
```
Start a MCP server with tool `python_execute` on port 8818:
### `mcp_server`
(for tools, only `python_execute` as an example now)
Start a MCP server with tool `python_execute` on port 8895 (or pass the port as an argument):
```bash
./build/bin/mcp_server # Unix/MacOS
./build/bin/mcp_server <port> # Unix/MacOS
```
```shell
.\build\bin\Release\mcp_server.exe # Windows
.\build\bin\Release\mcp_server.exe <port> # Windows
```
Run agent `humanus` with tools `python_execute`, `filesystem` and `playwright` (for browser use):
### `humanus_cli`
Run with tools `python_execute`, `filesystem` and `playwright` (for browser use):
```bash
./build/bin/humanus_cli # Unix/MacOS
@ -53,7 +63,9 @@ Run agent `humanus` with tools `python_execute`, `filesystem` and `playwright` (
.\build\bin\Release\humanus_cli.exe # Windows
```
Run experimental planning flow (only agent `humanus` as executor):
### `humanus_cli_plan` (WIP)
Run planning flow (only agent `humanus` as executor):
```bash
./build/bin/humanus_cli_plan # Unix/MacOS
```
@ -62,6 +74,43 @@ Run experimental planning flow (only agent `humanus` as executor):
.\build\bin\Release\humanus_cli_plan.exe # Windows
```
### `humanus_server` (WIP)
Run agents in MCP the server (default running on port 8896):
- `humanus_initialze`: Pass JSON configuration (like in `config/config.toml`) to initialize an agent for a session. (Only one agent will be maintained for each session/client)
- `humanus_run`: Pass `prompt` to tell the agent what to do. (Only one task at a time)
- `humanus_terminate`: Stop the current task.
- `humanus_status`: Get the current states and other information about the agent and the task. Returns:
- `state`: Agent state.
- `current_step`: Current step index of the agent.
- `max_steps`: Maximum steps executing without interaction with the user.
- `prompt_tokens`: Prompt (input) tokens consumption.
- `completion_tokens`: Completion (output) tokens consumption.
- `log_buffer`: Logs in the buffer, like `humanus_cli`. Will be cleared after fetched.
- `result`: Explaining what the agent did. Not empty if the task is finished.
```bash
./build/bin/humanus_server <port> # Unix/MacOS
```
```shell
.\build\bin\Release\humanus_cli_plan.exe <port> # Windows
```
Configure it in Cursor:
```json
{
"mcpServers": {
"humanus": {
"url": "http://localhost:8896/sse"
}
}
}
```
> What if add `humanus` to `mcp_servers`? It might be interesting.
## Acknowledgement
<p align="center">
@ -72,7 +121,7 @@ Run experimental planning flow (only agent `humanus` as executor):
## Cite
```
@misc{humanuscpp,
@misc{humanus_cpp,
author = {Zihong Zhang and Zuchao Li},
title = {humanus.cpp: A Lightweight C++ Framework for Local LLM Agents},
year = {2025}

View File

@ -1,5 +1,13 @@
[humanus_cli]
llm = "qwen-max-latest"
llm = "qwen-max-latest" # Key in config_llm.toml
memory = "long-context" # Key in config_mem.toml
tools = ["filesystem", "playwright", "image_loader"] # Builtin tools configuration
mcp_servers = ["python_execute"] # Key in config_mcp.toml, all MCP tools provided by servers will be added
max_steps = 30 # Maximum automatic steps without user's check
duplicate_threshold = 2 # Used to detect repeating condition (will be checked by LCS)
[humanus_plan]
llm = "deepseek-chat"
memory = "long-context"
tools = ["filesystem", "playwright", "image_loader"]
mcp_servers = ["python_execute"]

View File

@ -1,11 +1,11 @@
["nomic-embed-text-v1.5"]
provider = "oai"
base_url = "http://localhost:8080"
endpoint = "/v1/embeddings"
model = "nomic-embed-text-v1.5.f16.gguf"
api_key = ""
embeddings_dim = 768
max_retries = 3
provider = "oai" # Only support OAI-Compatible style for now
base_url = "http://localhost:8080" # Base url. Note: Don't add any endpoint behind
endpoint = "/v1/embeddings" # Endpoint of embeddings
model = "nomic-embed-text-v1.5.f16.gguf" # Model name
api_key = "" # Your API Key
embeddings_dim = 768 # Dimension of embeddings (refer to API docs)
max_retries = 3 # Maximum retry count
[qwen-text-embedding-v3]
provider = "oai"

View File

@ -1,3 +1,9 @@
[qwen-max]
model = "qwen-max" # Model name
base_url = "https://dashscope.aliyuncs.com" # Base url. Note: Don't add any endpoint behind
endpoint = "/compatible-mode/v1/chat/completions" # Endpoint of chat completions
api_key = "sk-cb1bb2a240d84182bb93f6dd0fe03600" # Your API Key
[qwen-max-latest]
model = "qwen-max-latest"
base_url = "https://dashscope.aliyuncs.com"
@ -9,7 +15,7 @@ model = "qwen-vl-max-latest"
base_url = "https://dashscope.aliyuncs.com"
endpoint = "/compatible-mode/v1/chat/completions"
api_key = "sk-cb1bb2a240d84182bb93f6dd0fe03600"
enable_vision = true
enable_vision = true # This means the model could accept content item like {"image_url", {"url", "xxx"}}
["claude-3.5-sonnet"]
model = "anthropic/claude-3.5-sonnet"

View File

@ -3,6 +3,7 @@ type = "sse"
host = "localhost"
port = 8895
sse_endpoint = "/sse"
message_enpoint = "/message"
[puppeteer]
type = "stdio"
@ -19,4 +20,4 @@ type = "stdio"
command = "npx"
args = ["-y",
"@modelcontextprotocol/server-filesystem",
"/Users/hyde/Desktop"]
"/Users/hyde/Desktop"] # Allowed paths

View File

@ -1,12 +1,12 @@
[default]
max_messages = 16
max_tokens_message = 32768
max_tokens_messages = 65536
max_tokens_context = 131072
retrieval_limit = 32
embedding_model = "qwen-text-embedding-v3"
vector_store = "hnswlib"
llm = "qwen-max-latest"
max_messages = 16 # Maximum number of messages in short-term memory
max_tokens_message = 32768 # Maximum number of tokens in single message
max_tokens_messages = 65536 # Maximum number of tokens in short-term memory
max_tokens_context = 131072 # Maximum number of tokens in context (used by `get_messages`)
retrieval_limit = 32 # Maximum number of results to retrive from long-term memory
embedding_model = "qwen-text-embedding-v3" # Key in config_embd.toml
vector_store = "hnswlib" # Key in config_vec.toml
llm = "qwen-max-latest" # Key in config_llm.toml
[long-context]
max_messages = 32

View File

@ -0,0 +1,15 @@
[humanus_cli]
llm = "qwen-max-latest" # Key in config_llm.toml
memory = "default" # Key in config_mem.toml
tools = ["filesystem", "playwright", "image_loader"] # Builtin tools configuration
mcp_servers = ["python_execute"] # Key in config_mcp.toml, all MCP tools provided by servers will be added
max_steps = 30 # Maximum automatic steps without user's check
duplicate_threshold = 2 # Used to detect repeating condition (will be checked by LCS)
[humanus_plan]
llm = "deepseek-chat"
memory = "long-context"
tools = ["filesystem", "playwright", "image_loader"]
mcp_servers = ["python_execute"]
max_steps = 30
duplicate_threshold = 2

View File

@ -0,0 +1,17 @@
["nomic-embed-text-v1.5"]
provider = "oai" # Only support OAI-Compatible style for now
base_url = "http://localhost:8080" # Base url. Note: Don't add any endpoint behind
endpoint = "/v1/embeddings" # Endpoint of embeddings
model = "nomic-embed-text-v1.5.f16.gguf" # Model name
api_key = "" # Your API Key
embeddings_dim = 768 # Dimension of embeddings (refer to API docs)
max_retries = 3 # Maximum retry count
[qwen-text-embedding-v3]
provider = "oai"
base_url = "https://dashscope.aliyuncs.com"
endpoint = "/compatible-mode/v1/embeddings"
model = "text-embedding-v3"
api_key = "sk-"
embeddings_dim = 1024
max_retries = 3

View File

@ -0,0 +1,45 @@
[qwen-max]
model = "qwen-max" # Model name
base_url = "https://dashscope.aliyuncs.com" # Base url. Note: Don't add any endpoint behind
endpoint = "/compatible-mode/v1/chat/completions" # Endpoint of chat completions
api_key = "sk-" # Your API Key
[qwen-max-latest]
model = "qwen-max-latest" # Model name
base_url = "https://dashscope.aliyuncs.com" # Base url. Note: Don't add any endpoint behind
endpoint = "/compatible-mode/v1/chat/completions" # Endpoint of chat completions
api_key = "sk-" # Your API Key
[qwen-vl-max-latest]
model = "qwen-vl-max-latest"
base_url = "https://dashscope.aliyuncs.com"
endpoint = "/compatible-mode/v1/chat/completions"
api_key = "sk-"
enable_vision = true # This means the model could accept content item like {"image_url", {"url", "xxx"}}
["claude-3.5-sonnet"]
model = "anthropic/claude-3.5-sonnet"
base_url = "https://openrouter.ai"
endpoint = "/api/v1/chat/completions"
api_key = "sk-"
enable_vision = true
["claude-3.7-sonnet"]
model = "anthropic/claude-3.7-sonnet"
base_url = "https://openrouter.ai"
endpoint = "/api/v1/chat/completions"
api_key = "sk-"
enable_vision = true
[deepseek-chat]
model = "deepseek-chat"
base_url = "https://api.deepseek.com"
endpoint = "/v1/chat/completions"
api_key = "sk-"
[deepseek-r1]
model = "deepseek-reasoner"
base_url = "https://api.deepseek.com"
endpoint = "/v1/chat/completions"
api_key = "sk-"
enable_tool = false # The API provider does not support tool use. Use builtin tool hint template.

View File

@ -0,0 +1,23 @@
[python_execute]
type = "sse"
host = "localhost"
port = 8895
sse_endpoint = "/sse"
message_enpoint = "/message"
[puppeteer]
type = "stdio"
command = "npx"
args = ["-y", "@modelcontextprotocol/server-puppeteer"]
[playwright]
type = "stdio"
command = "npx"
args = ["-y", "@executeautomation/playwright-mcp-server"]
[filesystem]
type = "stdio"
command = "npx"
args = ["-y",
"@modelcontextprotocol/server-filesystem",
"/Users/{Username}/Desktop"] # Allowed paths

View File

@ -0,0 +1,19 @@
[default]
max_messages = 16 # Maximum number of messages in short-term memory
max_tokens_message = 32768 # Maximum number of tokens in single message
max_tokens_messages = 65536 # Maximum number of tokens in short-term memory
max_tokens_context = 131072 # Maximum number of tokens in context (used by `get_messages`)
retrieval_limit = 32 # Maximum number of results to retrive from long-term memory
embedding_model = "qwen-text-embedding-v3" # Key in config_embd.toml
vector_store = "hnswlib" # Key in config_vec.toml
llm = "qwen-max-latest" # Key in config_llm.toml
[long-context]
max_messages = 32
max_tokens_message = 64000
max_tokens_messages = 128000
max_tokens_context = 128000
retrieval_limit = 32
embedding_model = "qwen-text-embedding-v3"
vector_store = "hnswlib"
llm = "qwen-max-latest"

View File

@ -0,0 +1,8 @@
[hnswlib]
provider = "hnswlib"
dim = 768 # Dimension of the elements
max_elements = 100 # Maximum number of elements, should be known beforehand
M = 16 # Tightly connected with internal dimensionality of the data
# strongly affects the memory consumption
ef_construction = 200 # Controls index search speed/build speed tradeoff
metric = "L2" # Distance metric to use, can be L2 or IP

View File

@ -56,14 +56,14 @@ int main() {
const auto& config_table = *config_data["humanus_cli"].as_table();
Humanus agent = Humanus::load_from_toml(config_table);
auto agent = std::make_shared<Humanus>(Humanus::load_from_toml(config_table));
while (true) {
if (agent.current_step == agent.max_steps) {
std::cout << "Automatically paused after " << agent.max_steps << " steps." << std::endl;
if (agent->current_step == agent->max_steps) {
std::cout << "Automatically paused after " << agent->max_steps << " steps." << std::endl;
std::cout << "Enter your prompt (enter an empty line to resume or 'exit' to quit): ";
std::cout.flush();
agent.reset(false);
agent->reset(false);
} else {
std::cout << "Enter your prompt (or 'exit' to quit): ";
std::cout.flush();
@ -77,7 +77,7 @@ int main() {
}
logger->info("Processing your request: " + prompt);
agent.run(prompt);
agent->run(prompt);
}
return 0;

View File

@ -57,30 +57,26 @@ int main() {
const auto& config_table = *config_data["humanus_plan"].as_table();
Humanus agent = Humanus::load_from_toml(config_table);
auto agent = std::make_shared<Humanus>(Humanus::load_from_toml(config_table));
std::map<std::string, std::shared_ptr<BaseAgent>> agents;
agents["default"] = std::make_shared<Humanus>(agent);
agents["default"] = agent;
auto flow = FlowFactory::create_flow(
FlowType::PLANNING,
nullptr, // llm
nullptr, // planning_tool
std::vector<std::string>{}, // executor_keys
"", // active_plan_id
agents, // agents
std::vector<std::shared_ptr<BaseTool>>{}, // tools
agent->llm,
agents,
"default" // primary_agent_key
);
while (true) {
if (agent.current_step == agent.max_steps) {
std::cout << "Automatically paused after " << agent.current_step << " steps." << std::endl;
if (agent->current_step == agent->max_steps) {
std::cout << "Automatically paused after " << agent->current_step << " steps." << std::endl;
std::cout << "Enter your prompt (enter an empty line to resume or 'exit' to quit): ";
agent.reset(false);
} else if (agent.state != AgentState::IDLE) {
agent->reset(false);
} else if (agent->state != AgentState::IDLE) {
std::cout << "Enter your prompt (enter an empty line to retry or 'exit' to quit): ";
agent.reset(false);
agent->reset(false);
} else {
std::cout << "Enter your prompt (or 'exit' to quit): ";
}
@ -94,6 +90,6 @@ int main() {
logger->info("Processing your request: " + prompt);
auto result = flow->execute(prompt);
logger->info("🌟 " + agent.name + "'s summary: " + result);
logger->info("🌟 " + agent->name + "'s summary: " + result);
}
}

View File

@ -169,6 +169,8 @@ int main(int argc, char** argv) {
agent->reset();
session_manager->clear_result(session_id);
std::thread([agent, session_manager, prompt, session_id]() {
try {
session_sink->set_session_id(session_id);
@ -234,7 +236,7 @@ int main(int argc, char** argv) {
{"max_steps", agent->max_steps},
{"prompt_tokens", agent->get_prompt_tokens()},
{"completion_tokens", agent->get_completion_tokens()},
{"logs_buffer", session_sink->get_buffer(session_id)},
{"log_buffer", session_sink->get_buffer(session_id)},
{"result", result}
};

View File

@ -17,19 +17,18 @@ const std::map<FlowType, std::string> FLOW_TYPE_MAP = {
// Base class for execution flows supporting multiple agents
struct BaseFlow {
std::map<std::string, std::shared_ptr<BaseAgent>> agents;
std::vector<std::shared_ptr<BaseTool>> tools;
std::string primary_agent_key;
BaseFlow(const std::map<std::string, std::shared_ptr<BaseAgent>>& agents = {}, const std::vector<std::shared_ptr<BaseTool>>& tools = {}, const std::string& primary_agent_key = "")
: agents(agents), tools(tools), primary_agent_key(primary_agent_key) {
BaseFlow(const std::map<std::string, std::shared_ptr<BaseAgent>>& agents = {}, const std::string& primary_agent_key = "")
: agents(agents), primary_agent_key(primary_agent_key) {
// If primary agent not specified, use first agent
if (primary_agent_key.empty() && !agents.empty()) {
this->primary_agent_key = agents.begin()->first;
}
}
BaseFlow(const std::shared_ptr<BaseAgent>& agent, const std::vector<std::shared_ptr<BaseTool>>& tools = {}, const std::string& primary_agent_key = "")
: tools(tools), primary_agent_key(primary_agent_key) {
BaseFlow(const std::shared_ptr<BaseAgent>& agent, const std::string& primary_agent_key = "")
: primary_agent_key(primary_agent_key) {
agents["default"] = agent;
// If primary agent not specified, use first agent
if (primary_agent_key.empty()) {
@ -37,8 +36,8 @@ struct BaseFlow {
}
}
BaseFlow(const std::vector<std::shared_ptr<BaseAgent>>& agents_list, const std::vector<std::shared_ptr<BaseTool>>& tools = {}, const std::string& primary_agent_key = "")
: tools(tools), primary_agent_key(primary_agent_key) {
BaseFlow(const std::vector<std::shared_ptr<BaseAgent>>& agents_list, const std::string& primary_agent_key = "")
: primary_agent_key(primary_agent_key) {
for (size_t i = 0; i < agents_list.size(); i++) {
agents["agent_" + std::to_string(i)] = agents_list[i];
}

View File

@ -10,13 +10,6 @@ std::shared_ptr<BaseAgent> PlanningFlow::get_executor(const std::string& step_ty
return agents.at(step_type);
}
// Otherwise use the first available executor or fall back to primary agent
for (const auto& key : executor_keys) {
if (agents.find(key) != agents.end()) {
return agents.at(key);
}
}
// Fallback to primary agent
return primary_agent();
}
@ -140,10 +133,15 @@ void PlanningFlow::_create_initial_plan(const std::string& request) {
logger->warn("Creating default plan");
// Create default plan using the ToolCollection
auto title = request;
if (title.size() > 50) {
title = title.substr(0, validate_utf8(title.substr(0, 50))) + "...";
}
planning_tool->execute({
{"command", "create"},
{"plan_id", active_plan_id},
{"title", request.substr(0, std::min(50, static_cast<int>(request.size()))) + (request.size() > 50 ? "..." : "")},
{"title", title},
{"steps", {"Analyze request", "Execute task", "Verify results"}}
});
}

View File

@ -16,37 +16,20 @@ namespace humanus {
struct PlanningFlow : public BaseFlow {
std::shared_ptr<LLM> llm;
std::shared_ptr<PlanningTool> planning_tool;
std::vector<std::string> executor_keys;
std::string active_plan_id;
int current_step_index = -1;
int current_step_index;
PlanningFlow(
const std::shared_ptr<LLM>& llm = nullptr,
const std::shared_ptr<PlanningTool>& planning_tool = nullptr,
const std::vector<std::string>& executor_keys = {},
const std::string& active_plan_id = "",
const std::map<std::string, std::shared_ptr<BaseAgent>>& agents = {},
const std::vector<std::shared_ptr<BaseTool>>& tools = {},
const std::string& primary_agent_key = ""
) : BaseFlow(agents, tools, primary_agent_key),
llm(llm),
planning_tool(planning_tool),
executor_keys(executor_keys),
active_plan_id(active_plan_id) {
const std::string& primary_agent_key = "default"
) : BaseFlow(agents, primary_agent_key),
llm(llm) {
if (!llm) {
this->llm = LLM::get_instance("default");
}
if (!planning_tool) {
this->planning_tool = std::make_shared<PlanningTool>();
}
if (active_plan_id.empty()) {
this->active_plan_id = "plan_" + std::to_string(std::chrono::system_clock::now().time_since_epoch().count());
}
if (executor_keys.empty()) {
for (const auto& [key, agent] : agents) {
this->executor_keys.push_back(key);
}
this->llm = primary_agent()->llm;
}
planning_tool = std::make_shared<PlanningTool>();
reset();
}
// Get an appropriate executor agent for the current step.

View File

@ -1,6 +1,7 @@
#ifndef HUMANUS_UTILS_H
#define HUMANUS_UTILS_H
#include "mcp_message.h"
#include <filesystem>
#include <iostream>
@ -14,6 +15,8 @@
namespace humanus {
using json = mcp::json;
// Get project root directory
inline std::filesystem::path get_project_root() {
return std::filesystem::path(__FILE__).parent_path().parent_path();
@ -28,6 +31,9 @@ size_t validate_utf8(const std::string& text);
bool readline_utf8(std::string & line, bool multiline_input = false);
// Parse the content of a message to a string
std::string parse_json_content(const json& content);
} // namespace humanus
#endif

View File

@ -78,6 +78,12 @@ json LLM::format_messages(const std::vector<Message>& messages) {
formatted_messages.erase(formatted_messages.begin() + j + 1, formatted_messages.end());
if (!llm_config_->enable_vision) {
for (auto& message : formatted_messages) {
message["content"] = parse_json_content(message["content"]); // Images will be replaced by [image1], [image2], ...
}
}
return formatted_messages;
}

View File

@ -67,4 +67,24 @@ bool readline_utf8(std::string & line, bool multiline_input) {
return multiline_input;
}
// Parse the content of a message to a string
std::string parse_json_content(const json& content) {
if (content.is_string()) {
return content.get<std::string>();
} else if (content.is_array()) {
std::string result;
int image_cnt = 0;
for (const auto& item : content) {
if (item["type"] == "text") {
result += item["text"].get<std::string>();
} else if (item["type"] == "image_url") {
result += "[image" + std::to_string(++image_cnt) + "]";
}
}
return result;
} else {
return content.dump(2);
}
}
} // namespace humanus

View File

@ -3,6 +3,7 @@
#include "schema.h"
#include "config.h"
#include "utils.h"
#include "mcp_stdio_client.h"
#include "mcp_sse_client.h"
#include <string>
@ -51,24 +52,6 @@ struct ToolResult {
};
}
static std::string parse_json_content(const json& content) {
if (content.is_string()) {
return content.get<std::string>();
} else if (content.is_array()) {
std::string result;
for (const auto& item : content) {
if (item["type"] == "text") {
result += item["text"].get<std::string>();
} else if (item["type"] == "image_url") {
result += "<image>" + item["image_url"]["url"].get<std::string>() + "</image>";
}
}
return result;
} else {
return content.dump(2);
}
}
std::string to_string(int max_length = -1) const {
std::string result;
if (!error.empty()) {