diff --git a/CMakeLists.txt b/CMakeLists.txt index 2063d97..6b41c88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,9 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) if (WIN32) @@ -21,7 +24,7 @@ if (MSVC) add_compile_options("$<$:/utf-8>") add_compile_options("$<$:/bigobj>") add_compile_options("$<$:/bigobj>") - add_compile_options(/wd4244 /wd4267) + add_compile_options(/wd4244 /wd4267) # possible loss of data endif() # Set C++ standard @@ -78,7 +81,7 @@ file(GLOB FLOW_SOURCES "flow/*.cc" ) -add_executable(humanus_cpp +add_executable(humanus_cli main.cpp config.cpp llm.cpp @@ -90,9 +93,7 @@ add_executable(humanus_cpp ${FLOW_SOURCES} ) -target_link_libraries(humanus_cpp PRIVATE Threads::Threads mcp ${OPENSSL_LIBRARIES}) +target_link_libraries(humanus_cli PRIVATE Threads::Threads mcp ${OPENSSL_LIBRARIES}) if(Python3_FOUND) - target_link_libraries(humanus_cpp PRIVATE ${Python3_LIBRARIES}) -endif() - -install(TARGETS humanus_cpp DESTINATION bin) \ No newline at end of file + target_link_libraries(humanus_cli PRIVATE ${Python3_LIBRARIES}) +endif() \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..7b6e912 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +## Introduction + +Humanus (meaning "human" in Latin) is a lightweight framework inspired by OpenManus, integrated with the Model Context Protocol (MCP). `humanus.cpp` enables more flexible tool choices, and provides a foundation for building powerful local LLM agents. + +Let's embrace local LLM agents w/ Humanus! + +## Features + +## How to Build + +```bash +cmake -B build +cmake --build build --config Release +``` + +## How to Run + +```bash +./build/bin/humanus_cli # Unix/MacOS + +# Or? +.\build\bin\Release\humanus_cli.exe # Windows +``` + diff --git a/agent/base.h b/agent/base.h index 3ec1cbd..56ff4a9 100644 --- a/agent/base.h +++ b/agent/base.h @@ -1,9 +1,9 @@ #ifndef HUMANUS_AGENT_BASE_H #define HUMANUS_AGENT_BASE_H -#include "../llm.h" -#include "../schema.h" -#include "../logger.h" +#include "llm.h" +#include "schema.h" +#include "logger.h" #include #include #include @@ -37,6 +37,8 @@ struct BaseAgent : std::enable_shared_from_this { int duplicate_threshold; // Threshold for duplicate messages + std::string current_request; // Current request from user + BaseAgent( const std::string& name, const std::string& description, @@ -67,7 +69,7 @@ struct BaseAgent : std::enable_shared_from_this { llm = LLM::get_instance("default"); } if (!memory) { - memory = std::make_shared(); + memory = std::make_shared(max_steps); } } @@ -89,6 +91,8 @@ struct BaseAgent : std::enable_shared_from_this { // Execute the agent's main loop asynchronously virtual std::string run(const std::string& request = "") { + current_request = request; + if (state != AgentState::IDLE) { throw std::runtime_error("Cannot run agent from state " + agent_state_map[state]); } @@ -121,7 +125,7 @@ struct BaseAgent : std::enable_shared_from_this { if (current_step >= max_steps) { results.push_back("Terminated: Reached max steps (" + std::to_string(max_steps) + ")"); } - if (state == AgentState::ERR) { + if (state != AgentState::FINISHED) { results.push_back("Terminated: Agent state is " + agent_state_map[state]); } else { state = AgentState::IDLE; // FINISHED -> IDLE @@ -151,6 +155,9 @@ struct BaseAgent : std::enable_shared_from_this { std::string stuck_prompt = "\ Observed duplicate responses. Consider new strategies and avoid repeating ineffective paths already attempted."; next_step_prompt = stuck_prompt + "\n" + next_step_prompt; + if (!current_request.empty()) { + next_step_prompt += "\nAnd don't for get your current task: " + current_request; + } logger->warn("Agent detected stuck state. Added prompt: " + stuck_prompt); } @@ -182,6 +189,14 @@ struct BaseAgent : std::enable_shared_from_this { return duplicate_count >= duplicate_threshold; } + void reset(bool reset_memory = true) { + current_step = 0; + state = AgentState::IDLE; + if (reset_memory) { + memory->clear(); + } + } + void set_messages(const std::vector& messages) { memory->add_messages(messages); } diff --git a/agent/manus.h b/agent/humanus.h similarity index 73% rename from agent/manus.h rename to agent/humanus.h index 2221091..fef5f62 100644 --- a/agent/manus.h +++ b/agent/humanus.h @@ -1,14 +1,14 @@ -#ifndef HUMANUS_AGENT_MANUS_H -#define HUMANUS_AGENT_MANUS_H +#ifndef HUMANUS_AGENT_HUMANUS_H +#define HUMANUS_AGENT_HUMANUS_H #include "base.h" #include "toolcall.h" -#include "../prompt.h" -#include "../tool/tool_collection.h" -#include "../tool/python_execute.h" -#include "../tool/terminate.h" -#include "../tool/puppeteer.h" -#include "../tool/filesystem.h" +#include "prompt.h" +#include "tool/tool_collection.h" +#include "tool/python_execute.h" +#include "tool/terminate.h" +#include "tool/puppeteer.h" +#include "tool/filesystem.h" namespace humanus { @@ -19,22 +19,22 @@ namespace humanus { * including Python execution, web browsing, file operations, and information retrieval * to handle a wide range of user requests. */ -struct Manus : ToolCallAgent { - Manus( +struct Humanus : ToolCallAgent { + Humanus( const ToolCollection& available_tools = ToolCollection( // Add general-purpose tools to the tool collection { std::make_shared(), std::make_shared(), // for web browsing - std::make_shared(), + std::make_shared(), std::make_shared() } ), const std::string& tool_choice = "auto", const std::set& special_tool_names = {"terminate"}, - const std::string& name = "manus", + const std::string& name = "humanus", const std::string& description = "A versatile agent that can solve various tasks using multiple tools", - const std::string& system_prompt = prompt::manus::SYSTEM_PROMPT, - const std::string& next_step_prompt = prompt::manus::NEXT_STEP_PROMPT, + const std::string& system_prompt = prompt::humanus::SYSTEM_PROMPT, + const std::string& next_step_prompt = prompt::humanus::NEXT_STEP_PROMPT, const std::shared_ptr& llm = nullptr, const std::shared_ptr& memory = nullptr, AgentState state = AgentState::IDLE, @@ -60,4 +60,4 @@ struct Manus : ToolCallAgent { } -#endif // HUMANUS_AGENT_MANUS_H +#endif // HUMANUS_AGENT_HUMANUS_H diff --git a/agent/planning.cpp b/agent/planning.cpp index d6e390d..ad04cbf 100644 --- a/agent/planning.cpp +++ b/agent/planning.cpp @@ -16,9 +16,7 @@ void PlanningAgent::initialize_plan_and_verify_tools() { bool PlanningAgent::think() { std::string prompt; if (!active_plan_id.empty()) { - prompt = "CURRENT PLAN STATUS:\n" + get_plan() + "\n\n" + next_step_prompt; - } else { - prompt = next_step_prompt; + prompt = "CURRENT PLAN STATUS:\n" + get_plan(); } memory->add_message(Message::user_message(prompt)); @@ -206,7 +204,8 @@ void PlanningAgent::create_initial_plan(const std::string& request) { memory->add_messages(messages); json response = llm->ask_tool( messages, - {Message::system_message(system_prompt)}, + system_prompt, + next_step_prompt, available_tools.to_params(), tool_choice ); diff --git a/agent/planning.h b/agent/planning.h index 10595cb..32479e3 100644 --- a/agent/planning.h +++ b/agent/planning.h @@ -2,8 +2,8 @@ #define HUMANUS_AGENT_PLANNING_H #include "toolcall.h" -#include "../tool/planning.h" -#include "../prompt.h" +#include "tool/planning.h" +#include "prompt.h" namespace humanus { diff --git a/agent/swe.h b/agent/swe.h index 31db206..a342fc1 100644 --- a/agent/swe.h +++ b/agent/swe.h @@ -2,11 +2,11 @@ #define HUMANUS_AGENT_SWE_H #include "toolcall.h" -#include "../tool/tool_collection.h" -#include "../tool/terminate.h" -#include "../tool/shell.h" -#include "../tool/filesystem.h" -#include "../prompt.h" +#include "tool/tool_collection.h" +#include "tool/terminate.h" +#include "tool/shell.h" +#include "tool/filesystem.h" +#include "prompt.h" namespace humanus { @@ -19,7 +19,7 @@ struct SweAgent : ToolCallAgent { const ToolCollection& available_tools = ToolCollection( { std::make_shared(), - std::make_shared(), + std::make_shared(), std::make_shared() } ), diff --git a/agent/toolcall.cpp b/agent/toolcall.cpp index 78624a5..308115a 100644 --- a/agent/toolcall.cpp +++ b/agent/toolcall.cpp @@ -4,15 +4,11 @@ namespace humanus { // Process current state and decide next actions using tools bool ToolCallAgent::think() { - if (!next_step_prompt.empty()) { - auto user_msg = Message::user_message(next_step_prompt); - memory->add_message(user_msg); - } - // Get response with tool options auto response = llm->ask_tool( memory->messages, - system_prompt.empty() ? std::vector{} : std::vector{Message::system_message(system_prompt)}, + system_prompt, + next_step_prompt, available_tools.to_params(), tool_choice ); @@ -76,7 +72,7 @@ bool ToolCallAgent::think() { std::string ToolCallAgent::act() { if (tool_calls.empty()) { if (tool_choice == "required") { - throw std::runtime_error(TOOL_CALL_REQUIRED); + throw std::runtime_error("Required tools but none selected"); } // Return last message content if no tool calls @@ -87,7 +83,7 @@ std::string ToolCallAgent::act() { for (const auto& tool_call : tool_calls) { auto result = execute_tool(tool_call); logger->info( - "🎯 Tool '" + tool_call.function.name + "' completed its mission! Result: " + result + "🎯 Tool '" + tool_call.function.name + "' completed its mission! Result: " + result.substr(0, 500) + (result.size() > 500 ? "..." : "") ); // Add tool response to memory diff --git a/agent/toolcall.h b/agent/toolcall.h index a112400..894d276 100644 --- a/agent/toolcall.h +++ b/agent/toolcall.h @@ -2,10 +2,10 @@ #define HUMANUS_AGENT_TOOLCALL_H #include "react.h" -#include "../prompt.h" -#include "../tool/tool_collection.h" -#include "../tool/create_chat_completion.h" -#include "../tool/terminate.h" +#include "prompt.h" +#include "tool/tool_collection.h" +#include "tool/create_chat_completion.h" +#include "tool/terminate.h" namespace humanus { diff --git a/config.cpp b/config.cpp index 9942d5e..cb21aef 100644 --- a/config.cpp +++ b/config.cpp @@ -17,9 +17,11 @@ void Config::_load_initial_config() { const auto& data = toml::parse_file(config_path.string()); - // Check if tool configuration exists + // Load LLM configuration + + // Check if llm configuration exists if (!data.contains("llm") || !data["llm"].is_table()) { - throw std::runtime_error("找不到llm配置: "); + throw std::runtime_error("Config file does not contain `llm` table"); } const auto& llm_table = *data["llm"].as_table(); @@ -52,23 +54,43 @@ void Config::_load_initial_config() { llm_settings.max_tokens = llm_table["max_tokens"].as_integer()->get(); } + if (llm_table.contains("timeout") && llm_table["timeout"].is_integer()) { + llm_settings.timeout = llm_table["timeout"].as_integer()->get(); + } + if (llm_table.contains("temperature") && llm_table["temperature"].is_floating_point()) { llm_settings.temperature = llm_table["temperature"].as_floating_point()->get(); } + if (llm_table.contains("oai_tool_support") && llm_table["oai_tool_support"].is_boolean()) { + llm_settings.oai_tool_support = llm_table["oai_tool_support"].as_boolean()->get(); + } + _config.llm["default"] = llm_settings; + + // Load tool helper configurations + ToolHelper tool_helper; + if (data.contains("tool_helper") && data["tool_helper"].is_table()) { + const auto& tool_helper_table = *data["tool_helper"].as_table(); + + if (tool_helper_table.contains("tool_start")) { + tool_helper.tool_start = tool_helper_table["tool_start"].as_string()->get(); + } + + if (tool_helper_table.contains("tool_end")) { + tool_helper.tool_end = tool_helper_table["tool_end"].as_string()->get(); + } + + if (tool_helper_table.contains("tool_hint_template")) { + tool_helper.tool_hint_template = tool_helper_table["tool_hint_template"].as_string()->get(); + } + } + _config.tool_helper["default"] = tool_helper; } catch (const std::exception& e) { - std::cerr << "加载配置文件失败: " << e.what() << std::endl; + std::cerr << "Loading config file failed: " << e.what() << std::endl; // Set default configuration - LLMSettings default_settings; - default_settings.model = "gpt-3.5-turbo"; - default_settings.api_key = "sk-"; - default_settings.base_url = "https://api.openai.com"; - default_settings.end_point = "/v1/chat/completions"; - default_settings.max_tokens = 4096; - default_settings.temperature = 1.0; - - _config.llm["default"] = default_settings; + _config.llm["default"] = LLMSettings(); + _config.tool_helper["default"] = ToolHelper(); } } diff --git a/config.h b/config.h index 15f507d..6aeaa2b 100644 --- a/config.h +++ b/config.h @@ -10,6 +10,7 @@ #include #include "schema.h" +#include "prompt.h" namespace humanus { @@ -18,9 +19,7 @@ static std::filesystem::path get_project_root() { return std::filesystem::path(__FILE__).parent_path(); } -// Windows环境下使用静态变量 static const std::filesystem::path PROJECT_ROOT = get_project_root(); -static const std::filesystem::path WORKSPACE_ROOT = PROJECT_ROOT / "workspace"; struct LLMSettings { std::string model; @@ -28,17 +27,21 @@ struct LLMSettings { std::string base_url; std::string end_point; int max_tokens; + int timeout; double temperature; + bool oai_tool_support; LLMSettings( - const std::string& model = "", - const std::string& api_key = "", - const std::string& base_url = "", - const std::string& end_point = "/chat/completions", + const std::string& model = "deepseek-chat", + const std::string& api_key = "sk-", + const std::string& base_url = "https://api.deepseek.com", + const std::string& end_point = "/v1/chat/completions", int max_tokens = 4096, - double temperature = 1.0 + int timeout = 60, + double temperature = 1.0, + bool oai_tool_support = true ) : model(model), api_key(api_key), base_url(base_url), end_point(end_point), - max_tokens(max_tokens), temperature(temperature) {} + max_tokens(max_tokens), timeout(timeout), temperature(temperature), oai_tool_support(oai_tool_support) {} json to_json() const { json j; @@ -52,8 +55,107 @@ struct LLMSettings { } }; +struct ToolHelper { + std::string tool_start; + std::string tool_end; + std::string tool_hint_template; + + ToolHelper(const std::string& tool_start = "", const std::string& tool_end = "", const std::string& tool_hint_template = prompt::toolcall::TOOL_HINT_TEMPLATE) + : tool_start(tool_start), tool_end(tool_end), tool_hint_template(tool_hint_template) {} + + static ToolHelper get_instance() { + static ToolHelper instance; + return instance; + } + + static std::string str_replace(std::string& str, const std::string& from, const std::string& to) { + size_t start_pos = 0; + while ((start_pos = str.find(from, start_pos)) != std::string::npos) { + str.replace(start_pos, from.length(), to); + start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx' + } + return str; + } + + std::string hint(const std::string& tool_list) const { + std::string hint_str = tool_hint_template; + hint_str = str_replace(hint_str, "{tool_start}", tool_start); + hint_str = str_replace(hint_str, "{tool_end}", tool_end); + hint_str = str_replace(hint_str, "{tool_list}", tool_list); + return hint_str; + } + + json parse(const std::string& content) const { + std::string new_content = content; + json tool_calls = json::array(); + + size_t pos_start = new_content.find(tool_start); + size_t pos_end = pos_start == std::string::npos ? std::string::npos : new_content.find(tool_end, pos_start + tool_start.size()); + + if (pos_start != std::string::npos && pos_end == std::string::npos) { // Some might not have tool_end + pos_end = new_content.size(); + } + + while (pos_start != std::string::npos) { + std::string tool_content = new_content.substr(pos_start + tool_start.size(), pos_end - pos_start - tool_start.size()); + + if (!tool_content.empty()) { + try { + tool_calls.push_back({ + {"type", "function"}, + {"function", json::parse(tool_content)} + }); + tool_calls.back()["id"] = "call_" + std::to_string(std::chrono::system_clock::now().time_since_epoch().count()); + } catch (const json::exception& e) { + throw std::runtime_error("Invalid tool call: " + tool_content); + } + } + + auto trim = [](const std::string& str) -> std::string { + auto not_space = [](unsigned char ch) { return !std::isspace(ch); }; + + auto start = std::find_if(str.begin(), str.end(), not_space); + auto end = std::find_if(str.rbegin(), str.rend(), not_space).base(); + + if (start >= end) return ""; + return std::string(start, end); + }; + + std::string lhs = trim(new_content.substr(0, pos_start)); + std::string rhs = trim(new_content.substr(std::min(pos_end + tool_end.size(), new_content.size()))); + + new_content = lhs + rhs; + + pos_start = new_content.find(tool_start, pos_start); // Previous tool_call has been cut off + pos_end = pos_start == std::string::npos ? std::string::npos : new_content.find(tool_end, pos_start + tool_start.size()); + if (pos_start != std::string::npos && pos_end == std::string::npos) { // Some might not have tool_end + pos_end = new_content.size(); + } + } + + return { + {"content", new_content}, + {"tool_calls", tool_calls} // Might be empty if no tool calls found + }; + } + + json dump(const json& tool_calls) const { + std::string content; + if (!tool_calls.is_array()) { + throw std::runtime_error("Tool calls should be an array"); + } + for (const auto& tool_call : tool_calls) { + content += tool_start; + content += tool_call[tool_call["type"]].dump(2); + content += tool_end; + } + return content; + } +}; + struct AppConfig { std::map llm; + std::map tool_helper; }; class Config { @@ -115,6 +217,14 @@ public: const std::map& llm() const { return _config.llm; } + + /** + * @brief Get the tool helpers + * @return The tool helpers map + */ + const std::map& tool_helper() const { + return _config.tool_helper; + } /** * @brief Get the app config diff --git a/config/config.toml b/config/config.toml index febf9bb..9144f6b 100644 --- a/config/config.toml +++ b/config/config.toml @@ -1,6 +1,7 @@ [llm] -model = "anthropic/claude-3.7-sonnet" -base_url = "https://openrouter.ai" -end_point = "/api/v1/chat/completions" -api_key = "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" -max_tokens = 8192 \ No newline at end of file +model = "deepseek-reasoner" +base_url = "https://api.deepseek.com" +end_point = "/v1/chat/completions" +api_key = "sk-93c5bfcb920c4a8aa345791d429b8536" +max_tokens = 8192 +oai_tool_support = false \ No newline at end of file diff --git a/config/config.toml.bak b/config/config.toml.bak index 87abe58..8a12cb0 100644 --- a/config/config.toml.bak +++ b/config/config.toml.bak @@ -10,4 +10,11 @@ model = "deepseek-chat" base_url = "https://api.deepseek.com" end_point = "/v1/chat/completions" api_key = "sk-93c5bfcb920c4a8aa345791d429b8536" +max_tokens = 8192 + +[llm] +model = "qwen-max" +base_url = "https://dashscope.aliyuncs.com" +end_point = "/compatible-mode/v1/chat/completions" +api_key = "sk-cb1bb2a240d84182bb93f6dd0fe03600" max_tokens = 8192 \ No newline at end of file diff --git a/flow/base.h b/flow/base.h index 4208a9e..0689c4b 100644 --- a/flow/base.h +++ b/flow/base.h @@ -1,8 +1,8 @@ #ifndef HUMANUS_FLOW_BASE_H #define HUMANUS_FLOW_BASE_H -#include "../tool/base.h" -#include "../agent/base.h" +#include "tool/base.h" +#include "agent/base.h" namespace humanus { diff --git a/flow/flow_factory.h b/flow/flow_factory.h index a965b97..489caa4 100644 --- a/flow/flow_factory.h +++ b/flow/flow_factory.h @@ -2,7 +2,7 @@ #define HUMANUS_FLOW_FACTORY_H #include "base.h" -#include "../agent/base.h" +#include "agent/base.h" #include "planning.h" namespace humanus { diff --git a/flow/planning.cpp b/flow/planning.cpp index 80459d0..bf322ad 100644 --- a/flow/planning.cpp +++ b/flow/planning.cpp @@ -61,6 +61,9 @@ std::string PlanningFlow::execute(const std::string& input) { if (executor->state == AgentState::FINISHED || executor->state == AgentState::ERR) { break; } + + // Refactor memory + executor->reset(true); // TODO: More fine-grained memory reset? } return result; @@ -75,9 +78,7 @@ void PlanningFlow::_create_initial_plan(const std::string& request) { logger->info("Creating initial plan with ID: " + active_plan_id); // Create a system message for plan creation - Message system_message = Message::system_message( - "You are a planning assistant. Your task is to create a detailed plan with clear steps." - ); + std::string system_prompt = "You are a planning assistant. Your task is to create a detailed plan with clear steps."; // Create a user message with the request Message user_message = Message::user_message( @@ -87,8 +88,9 @@ void PlanningFlow::_create_initial_plan(const std::string& request) { // Call LLM with PlanningTool auto response = llm->ask_tool( {user_message}, - {system_message}, - {planning_tool->to_param()}, + system_prompt, + "", // No next_step_prompt for initial plan creation + json::array({planning_tool->to_param()}), "required" ); @@ -227,7 +229,9 @@ std::string PlanningFlow::_execute_step(const std::shared_ptr& execut std::string step_result = executor->run(step_prompt); // Mark the step as completed after successful execution - _mark_step_completed(); + if (executor->state != AgentState::ERR) { + _mark_step_completed(); + } return step_result; } catch (const std::exception& e) { @@ -374,9 +378,7 @@ std::string PlanningFlow::_finalize_plan() { // Create a summary using the flow's LLM directly try { - Message system_message = Message::system_message( - "You are a planning assistant. Your task is to summarize the completed plan." - ); + std::string system_prompt = "You are a planning assistant. Your task is to summarize the completed plan."; Message user_message = Message::user_message( "The plan has been completed. Here is the final plan status:\n\n" + plan_text + "\n\n" + @@ -385,7 +387,7 @@ std::string PlanningFlow::_finalize_plan() { auto response = llm->ask( {user_message}, - {system_message} + system_prompt ); return response; diff --git a/flow/planning.h b/flow/planning.h index 353a668..1670465 100644 --- a/flow/planning.h +++ b/flow/planning.h @@ -2,11 +2,11 @@ #define HUMANUS_FLOW_PLANNING_H #include "base.h" -#include "../agent/base.h" -#include "../llm.h" -#include "../logger.h" -#include "../schema.h" -#include "../tool/planning.h" +#include "agent/base.h" +#include "llm.h" +#include "logger.h" +#include "schema.h" +#include "tool/planning.h" #include namespace humanus { diff --git a/llm.h b/llm.h index e22a550..a6109d4 100644 --- a/llm.h +++ b/llm.h @@ -22,20 +22,29 @@ private: std::unique_ptr client_; std::shared_ptr llm_config_; + + std::shared_ptr tool_helper_; public: // Constructor - LLM(const std::string& config_name, const std::shared_ptr& llm_config = nullptr) : llm_config_(llm_config) { + LLM(const std::string& config_name, const std::shared_ptr& llm_config = nullptr, const std::shared_ptr& tool_helper = nullptr) : llm_config_(llm_config), tool_helper_(tool_helper) { if (!llm_config_) { if (Config::get_instance().llm().find(config_name) == Config::get_instance().llm().end()) { - throw std::invalid_argument("Config not found: " + config_name); + throw std::invalid_argument("LLM config not found: " + config_name); } llm_config_ = std::make_shared(Config::get_instance().llm().at(config_name)); } + if (!llm_config_->oai_tool_support && !tool_helper_) { + if (Config::get_instance().tool_helper().find(config_name) == Config::get_instance().tool_helper().end()) { + throw std::invalid_argument("Tool helper config not found: " + config_name); + } + tool_helper_ = std::make_shared(Config::get_instance().tool_helper().at(config_name)); + } client_ = std::make_unique(llm_config_->base_url); client_->set_default_headers({ {"Authorization", "Bearer " + llm_config_->api_key} }); + client_->set_read_timeout(llm_config_->timeout); } // Get the singleton instance @@ -53,50 +62,72 @@ public: * @throws std::invalid_argument If the message format is invalid or missing necessary fields * @throws std::runtime_error If the message type is not supported */ - static std::vector format_messages(const std::vector& messages) { - std::vector formatted_messages; + json format_messages(const std::vector& messages) { + json formatted_messages = json::array(); + + auto concat_content = [](const json& lhs, const json& rhs) -> json { + if (lhs.is_string() && rhs.is_string()) { + return lhs.get() + "\n" + rhs.get(); // Maybe other delimiter? + } + json res = json::array(); + if (lhs.is_string()) { + res.push_back({ + {"type", "text"}, + {"text", lhs.get()} + }); + } else if (lhs.is_array()) { + res.insert(res.end(), lhs.begin(), lhs.end()); + } + if (rhs.is_string()) { + res.push_back({ + {"type", "text"}, + {"text", rhs.get()} + }); + } else if (rhs.is_array()) { + res.insert(res.end(), rhs.begin(), rhs.end()); + } + return res; + }; for (const auto& message : messages) { + if (message.content.empty() && message.tool_calls.empty()) { + continue; + } formatted_messages.push_back(message.to_json()); + if (!llm_config_->oai_tool_support) { + if (formatted_messages.back()["role"] == "tool") { + std::string tool_results_str = formatted_messages.back().dump(2); + formatted_messages.back() = { + {"role", "user"}, + {"content", tool_results_str} + }; + } else if (!formatted_messages.back()["tool_calls"].empty()) { + if (formatted_messages.back()["content"].is_null()) { + formatted_messages.back()["content"] = ""; + } + std::string tool_calls_str = tool_helper_->dump(formatted_messages.back()["tool_calls"]); + formatted_messages.back().erase("tool_calls"); + formatted_messages.back()["content"] = concat_content(formatted_messages.back()["content"], tool_calls_str); + } + } } for (const auto& message : formatted_messages) { if (message["role"] != "user" && message["role"] != "assistant" && message["role"] != "system" && message["role"] != "tool") { throw std::invalid_argument("Invalid role: " + message["role"].get()); } - if (message["content"].empty() && message["tool_calls"].empty()) { - throw std::invalid_argument("Message must contain either 'content' or 'tool_calls'"); + } + + size_t i = 0, j = -1; + for (; i < formatted_messages.size(); i++) { + if (i == 0 || formatted_messages[i]["role"] != formatted_messages[j]["role"]) { + formatted_messages[++j] = formatted_messages[i]; + } else { + formatted_messages[j]["content"] = concat_content(formatted_messages[j]["content"], formatted_messages[i]["content"]); } } - return formatted_messages; - } - - /** - * @brief Format the message list to the format that LLM can accept - * @param messages json object message list - * @return The formatted message list - * @throws std::invalid_argument If the message format is invalid or missing necessary fields - * @throws std::runtime_error If the message type is not supported - */ - static std::vector format_messages(const std::vector& messages) { - std::vector formatted_messages; - - for (const auto& message : messages) { - if (!message.contains("role")) { - throw std::invalid_argument("Message missing necessary field: role"); - } - formatted_messages.push_back(message); - } - - for (const auto& message : formatted_messages) { - if (message["role"] != "user" && message["role"] != "assistant" && message["role"] != "system" && message["role"] != "tool") { - throw std::invalid_argument("Invalid role: " + message["role"].get()); - } - if (message["content"].empty() && message["tool_calls"].empty()) { - throw std::invalid_argument("Message must contain either 'content' or 'tool_calls'"); - } - } + formatted_messages.erase(formatted_messages.begin() + j + 1, formatted_messages.end()); return formatted_messages; } @@ -104,7 +135,7 @@ public: /** * @brief Send a request to the LLM and get the reply * @param messages The conversation message list - * @param system_msgs Optional system messages + * @param system_prompt Optional system message * @param max_retries The maximum number of retries * @return The generated assistant content * @throws std::invalid_argument If the message is invalid or the reply is empty @@ -112,17 +143,19 @@ public: */ std::string ask( const std::vector& messages, - const std::vector& system_msgs = {}, + const std::string& system_prompt = "", int max_retries = 3 ) { - std::vector formatted_messages; + json formatted_messages = json::array(); - if (!system_msgs.empty()) { - auto system_formatted_messages = format_messages(system_msgs); - formatted_messages.insert(formatted_messages.end(), system_formatted_messages.begin(), system_formatted_messages.end()); + if (!system_prompt.empty()) { + formatted_messages.push_back({ + {"role", "system"}, + {"content", system_prompt} + }); } - auto _formatted_messages = format_messages(messages); + json _formatted_messages = format_messages(messages); formatted_messages.insert(formatted_messages.end(), _formatted_messages.begin(), _formatted_messages.end()); json body = { @@ -146,7 +179,7 @@ public: try { json json_data = json::parse(res->body); return json_data["choices"][0]["message"]["content"].get(); - } catch (const std::exception & e) { + } catch (const std::exception& e) { logger->error("Failed to parse response: " + std::string(e.what())); } } else { @@ -171,7 +204,8 @@ public: /** * @brief Send a request to the LLM with tool functions * @param messages The conversation message list - * @param system_msgs Optional system messages + * @param system_prompt Optional system message + * @param next_step_prompt Optinonal prompt message for the next step * @param timeout The request timeout (seconds) * @param tools The tool list * @param tool_choice The tool choice strategy @@ -182,32 +216,58 @@ public: */ json ask_tool( const std::vector& messages, - const std::vector& system_msgs = {}, - const std::vector tools = {}, + const std::string& system_prompt = "", + const std::string& next_step_prompt = "", + const json& tools = {}, const std::string& tool_choice = "auto", - int timeout = 60, int max_retries = 3 ) { if (tool_choice != "none" && tool_choice != "auto" && tool_choice != "required") { throw std::invalid_argument("Invalid tool_choice: " + tool_choice); } - std::vector formatted_messages; + json formatted_messages = json::array(); - if (!system_msgs.empty()) { - auto system_formatted_messages = format_messages(system_msgs); - formatted_messages.insert(formatted_messages.end(), system_formatted_messages.begin(), system_formatted_messages.end()); + if (!system_prompt.empty()) { + formatted_messages.push_back({ + {"role", "system"}, + {"content", system_prompt} + }); } - auto _formatted_messages = format_messages(messages); + json _formatted_messages = format_messages(messages); formatted_messages.insert(formatted_messages.end(), _formatted_messages.begin(), _formatted_messages.end()); + if (!next_step_prompt.empty()) { + if (formatted_messages.empty() || formatted_messages.back()["role"] != "user") { + formatted_messages.push_back({ + {"role", "user"}, + {"content", next_step_prompt} + }); + } else { + if (formatted_messages.back()["content"].is_string()) { + formatted_messages.back()["content"] = formatted_messages.back()["content"].get() + "\n\n" + next_step_prompt; + } else if (formatted_messages.back()["content"].is_array()) { + formatted_messages.back()["content"].push_back({ + {"type", "text"}, + {"text", next_step_prompt} + }); + } + } + } + if (!tools.empty()) { for (const json& tool : tools) { if (!tool.contains("type")) { - throw std::invalid_argument("Tool must contain 'type' field"); + throw std::invalid_argument("Tool must contain 'type' field but got: " + tool.dump(2)); } } + if (tool_choice == "required" && tools.empty()) { + throw std::invalid_argument("No tool available for required tool choice"); + } + if (!tools.is_array()) { + throw std::invalid_argument("Tools must be an array"); + } } json body = { @@ -215,11 +275,26 @@ public: {"messages", formatted_messages}, {"temperature", llm_config_->temperature}, {"max_tokens", llm_config_->max_tokens}, - {"tools", tools}, {"tool_choice", tool_choice} }; - client_->set_read_timeout(timeout); + if (llm_config_->oai_tool_support) { + body["tools"] = tools; + } else { + if (body["messages"].empty() || body["messages"].back()["role"] != "user") { + body["messages"].push_back({ + {"role", "user"}, + {"content", tool_helper_->hint(tools.dump(2))} + }); + } else if (body["messages"].back()["content"].is_string()) { + body["messages"].back()["content"] = body["messages"].back()["content"].get() + "\n\n" + tool_helper_->hint(tools.dump(2)); + } else if (body["messages"].back()["content"].is_array()) { + body["messages"].back()["content"].push_back({ + {"type", "text"}, + {"text", tool_helper_->hint(tools.dump(2))} + }); + } + } std::string body_str = body.dump(); @@ -234,8 +309,12 @@ public: } else if (res->status == 200) { try { json json_data = json::parse(res->body); - return json_data["choices"][0]["message"]; - } catch (const std::exception & e) { + json message = json_data["choices"][0]["message"]; + if (!llm_config_->oai_tool_support && message["content"].is_string()) { + message = tool_helper_->parse(message["content"].get()); + } + return message; + } catch (const std::exception& e) { logger->error("Failed to parse response: " + std::string(e.what())); } } else { diff --git a/main.cpp b/main.cpp index df64798..97a96d0 100644 --- a/main.cpp +++ b/main.cpp @@ -1,4 +1,4 @@ -#include "agent/manus.h" +#include "agent/humanus.h" #include "logger.h" #include "prompt.h" #include "flow/flow_factory.h" @@ -20,7 +20,7 @@ using namespace humanus; #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) static void sigint_handler(int signo) { if (signo == SIGINT) { - logger->info("Received SIGINT, exiting..."); + logger->info("Interrupted by user\n"); exit(0); } } @@ -44,58 +44,62 @@ int main() { #endif } - Manus agent = Manus(); - while (true) { - if (agent.current_step == agent.max_steps) { - std::cout << "Program automatically paused after " << agent.max_steps << " steps." << std::endl; - std::cout << "Enter your prompt (enter empty line to resume or 'exit' to quit): "; - agent.current_step = 0; - } else { - std::cout << "Enter your prompt (or 'exit' to quit): "; - } - std::string prompt; - std::getline(std::cin, prompt); - if (prompt == "exit") { - logger->info("Goodbye!"); - break; - } - logger->info("Processing your request..."); - agent.run(prompt); - } - - // std::shared_ptr agent_ptr = std::make_shared(); - // std::map> agents; - // agents["default"] = agent_ptr; - - // auto flow = FlowFactory::create_flow( - // FlowType::PLANNING, - // nullptr, // llm - // nullptr, // planning_tool - // std::vector{}, // executor_keys - // "", // active_plan_id - // agents, // agents - // std::vector>{}, // tools - // "default" // primary_agent_key - // ); - + // Humanus agent = Humanus(); // while (true) { - // if (agent_ptr->current_step == agent_ptr->max_steps) { - // std::cout << "Program automatically paused after " << agent_ptr->current_step << " steps." << std::endl; - // std::cout << "Enter your prompt (enter empty line to resume or 'exit' to quit): "; - // agent_ptr->current_step = 0; + // if (agent.current_step == agent.max_steps) { + // std::cout << "Automatically paused after " << agent.max_steps << " steps." << std::endl; + // std::cout << "Enter your prompt (enter en empty line to resume or 'exit' to quit): "; + // agent.reset(false); // } else { // std::cout << "Enter your prompt (or 'exit' to quit): "; // } - // std::string prompt; // std::getline(std::cin, prompt); // if (prompt == "exit") { // logger->info("Goodbye!"); // break; // } - - // std::cout << "Processing your request..." << std::endl; - // auto result = flow->execute(prompt); - // std::cout << result << std::endl; + // logger->info("Processing your request..."); + // agent.run(prompt); // } + + std::shared_ptr agent_ptr = std::make_shared(); + std::map> agents; + agents["default"] = agent_ptr; + + auto flow = FlowFactory::create_flow( + FlowType::PLANNING, + nullptr, // llm + nullptr, // planning_tool + std::vector{}, // executor_keys + "", // active_plan_id + agents, // agents + std::vector>{}, // tools + "default" // primary_agent_key + ); + + while (true) { + if (agent_ptr->current_step == agent_ptr->max_steps) { + std::cout << "Program automatically paused after " << agent_ptr->current_step << " steps." << std::endl; + std::cout << "Enter your prompt (enter empty line to resume or 'exit' to quit): "; + agent_ptr->reset(false); + } else { + std::cout << "Enter your prompt (or 'exit' to quit): "; + } + + if (agent_ptr->state != AgentState::IDLE) { + break; + } + + std::string prompt; + std::getline(std::cin, prompt); + if (prompt == "exit") { + logger->info("Goodbye!"); + break; + } + + std::cout << "Processing your request..." << std::endl; + auto result = flow->execute(prompt); + std::cout << result << std::endl; + } } \ No newline at end of file diff --git a/mcp b/mcp index 5e9ff48..1e66845 160000 --- a/mcp +++ b/mcp @@ -1 +1 @@ -Subproject commit 5e9ff48b070a11ba20529feb22c68d0e9ef46f3d +Subproject commit 1e66845e8a4306903bd7201a746fc5816cf509b7 diff --git a/prompt.cpp b/prompt.cpp index c3f3a0b..0680122 100644 --- a/prompt.cpp +++ b/prompt.cpp @@ -4,20 +4,20 @@ namespace humanus { namespace prompt { -namespace manus { +namespace humanus { const char* SYSTEM_PROMPT = "\ -You are OpenManus, an all-capable AI assistant, aimed at solving any task presented by the user. You have various tools at your disposal that you can call upon to efficiently complete complex requests. Whether it's programming, information retrieval, file processing, or web browsing, you can handle it all."; +You are Humanus, an all-capable AI assistant, aimed at solving any task presented by the user. You have various tools at your disposal that you can call upon to efficiently complete complex requests. Whether it's programming, information retrieval, file processing, or web browsing, you can handle it all."; -const char* NEXT_STEP_PROMPT = R"(You can interact with the computer using PythonExecute, save important content and information files through FileSaver, open browsers and retrieve information with Puppeteer. +const char* NEXT_STEP_PROMPT = R"(You can interact with the computer using python_execute, save important content and information files through filesystem, open browsers and retrieve information with puppeteer. -PythonExecute: Execute Python code to interact with the computer system, data processing, automation tasks, etc. +- python_execute: Execute Python code to interact with the computer system, data processing, automation tasks, etc. -FileSystem: Read/write files locally, such as txt, py, html, etc. Create/list/delete directories, move files/directories, search for files and get file metadata. +- filesystem: Read/write files locally, such as txt, py, html, etc. Create/list/delete directories, move files/directories, search for files and get file metadata. -Puppeteer: Open, browse, and get screenshots of web pages using Puppeteer, a headless Chrome browser. +- puppeteer: Open, browse, and get screenshots of web pages using Puppeteer, a headless Chrome browser. Based on user needs, proactively select the most appropriate tool or combination of tools. For complex tasks, you can break down the problem and use different tools step by step to solve it. After using each tool, clearly explain the execution results and suggest the next steps.)"; -} // namespace manus +} // namespace humanus namespace planning { const char* PLANNING_SYSTEM_PROMPT = R"(Based on the current state, what's your next step? @@ -70,6 +70,8 @@ namespace toolcall { const char* SYSTEM_PROMPT = "You are an agent that can execute tool calls"; const char* NEXT_STEP_PROMPT = "If you want to stop interaction, use `terminate` tool/function call."; + +const char* TOOL_HINT_TEMPLATE = "Available tools:\n{tool_list}\n\nFor each tool call, return a json object with tool name and arguments within {tool_start}{tool_end} XML tags:\n{tool_start}\n{\"name\": , \"arguments\": }\n{tool_end}"; } // namespace toolcall } // namespace prompt diff --git a/prompt.h b/prompt.h index da2bc3a..13bc567 100644 --- a/prompt.h +++ b/prompt.h @@ -5,10 +5,10 @@ namespace humanus { namespace prompt { -namespace manus { +namespace humanus { extern const char* SYSTEM_PROMPT; extern const char* NEXT_STEP_PROMPT; -} // namespace manus +} // namespace humanus namespace planning { extern const char* PLANNING_SYSTEM_PROMPT; @@ -23,19 +23,11 @@ extern const char* NEXT_STEP_TEMPLATE; namespace toolcall { extern const char* SYSTEM_PROMPT; extern const char* NEXT_STEP_PROMPT; +extern const char* TOOL_HINT_TEMPLATE; } // namespace toolcall } // namespace prompt -// 使用内联函数来获取常量 -inline const char* get_tool_call_required() { return "required"; } -inline const char* get_terminate_description() { return "Terminate the current interaction"; } -inline const char* get_planning_tool_description() { return "Create a plan for the given task"; } - -#define TOOL_CALL_REQUIRED get_tool_call_required() -#define _TERMINATE_DESCRIPTION get_terminate_description() -#define _PLANNING_TOOL_DESCRIPTION get_planning_tool_description() - } // namespace humanus #endif // HUMANUS_PROMPT_H diff --git a/schema.h b/schema.h index 585f66c..87370a5 100644 --- a/schema.h +++ b/schema.h @@ -177,7 +177,8 @@ struct Memory { // Add a message to the memory void add_message(const Message& message) { messages.push_back(message); - if (messages.size() > max_messages) { + while (!messages.empty() && messages.size() > max_messages || messages.begin()->role == "assistant" || messages.begin()->role == "tool") { + // Ensure the first message is always a user or system message messages.erase(messages.begin()); } } diff --git a/server/mcp_server_main.cpp b/server/mcp_server_main.cpp index b79254d..2411bf2 100644 --- a/server/mcp_server_main.cpp +++ b/server/mcp_server_main.cpp @@ -1,14 +1,14 @@ /** * @file mcp_server_main.cpp - * @brief OpenManus MCP Server Implementation + * @brief Humanus MCP Server Implementation * - * This file implements the OpenManus MCP server that provides tool invocation functionality. + * This file implements the Humanus MCP server that provides tool invocation functionality. * Currently implements the PythonExecute tool. */ -#include "../mcp/include/mcp_server.h" -#include "../mcp/include/mcp_tool.h" -#include "../mcp/include/mcp_resource.h" +#include "mcp_server.h" +#include "mcp_tool.h" +#include "mcp_resource.h" #include #include @@ -21,7 +21,7 @@ extern void register_python_execute_tool(mcp::server& server); int main() { // Create and configure server mcp::server server("localhost", 8818); - server.set_server_info("OpenManusMCPServer", "0.0.1"); + server.set_server_info("HumanusMCPServer", "0.0.1"); // Set server capabilities mcp::json capabilities = { @@ -33,7 +33,7 @@ int main() { register_python_execute_tool(server); // Start server - std::cout << "Starting OpenManus MCP server at localhost:8818..." << std::endl; + std::cout << "Starting Humanus MCP server at localhost:8818..." << std::endl; std::cout << "Press Ctrl+C to stop server" << std::endl; server.start(true); // Blocking mode diff --git a/server/python_execute.cpp b/server/python_execute.cpp index d5dbeb1..19fc87a 100644 --- a/server/python_execute.cpp +++ b/server/python_execute.cpp @@ -1,13 +1,13 @@ /** * @file python_execute.cpp - * @brief OpenManus Python execution tool implementation + * @brief Python execution tool implementation * - * This file implements the OpenManus Python execution tool, using Python.h to directly call the Python interpreter. + * This file implements the Python execution tool, using Python.h to directly call the Python interpreter. */ -#include "mcp/include/mcp_server.h" -#include "mcp/include/mcp_tool.h" -#include "mcp/include/mcp_resource.h" +#include "mcp_server.h" +#include "mcp_tool.h" +#include "mcp_resource.h" #include #include @@ -40,9 +40,6 @@ public: Py_Initialize(); if (Py_IsInitialized()) { is_initialized = true; - // Initialize thread support - PyEval_InitThreads(); - // Release GIL to allow other threads to acquire PyThreadState *_save = PyEval_SaveThread(); } else { std::cerr << "Failed to initialize Python interpreter" << std::endl; diff --git a/tool/base.h b/tool/base.h index 5c8917d..47b62e6 100644 --- a/tool/base.h +++ b/tool/base.h @@ -2,11 +2,11 @@ #define HUMANUS_TOOL_BASE_H #include "toml.hpp" -#include "../schema.h" -#include "../agent/base.h" -#include "../mcp/include/mcp_client.h" -#include "../mcp/include/mcp_stdio_client.h" -#include "../mcp/include/mcp_sse_client.h" +#include "schema.h" +#include "agent/base.h" +#include "mcp/include/mcp_client.h" +#include "mcp/include/mcp_stdio_client.h" +#include "mcp/include/mcp_sse_client.h" #include namespace humanus { @@ -164,7 +164,7 @@ struct ToolResult { // A ToolResult that represents a failure. struct ToolError : ToolResult { - ToolError(const std::string& error) : ToolResult({}, error) {} + ToolError(const json& error) : ToolResult({}, error) {} }; // Execute the tool with given parameters. diff --git a/tool/filesystem.h b/tool/filesystem.h index f61beb1..b7c0996 100644 --- a/tool/filesystem.h +++ b/tool/filesystem.h @@ -5,15 +5,15 @@ namespace humanus { -struct FileSystem : BaseTool { +struct Filesystem : BaseTool { inline static const std::string name_ = "filesystem"; - inline static const std::string description_ = "## Features\n\n- Read/write files\n- Create/list/delete directories\n- Move files/directories\n- Search files\n- Get file metadata\n\n**Note**: The server will only allow operations within directories specified via `args`."; + inline static const std::string description_ = "## Features\n\n- Read/write files\n- Create/list/delete directories\n- Move files/directories\n- Search files\n- Get file metadata"; inline static const json parameters_ = json::parse(R"json({ "type": "object", "properties": { - "tool": { + "command": { "type": "string", - "description": "### Tools\n\n- **read_file**\n - Read complete contents of a file\n - Input: `path` (string)\n - Reads complete file contents with UTF-8 encoding\n\n- **read_multiple_files**\n - Read multiple files simultaneously\n - Input: `paths` (string[])\n - Failed reads won't stop the entire operation\n\n- **write_file**\n - Create new file or overwrite existing (exercise caution with this)\n - Inputs:\n - `path` (string): File location\n - `content` (string): File content\n\n- **edit_file**\n - Make selective edits using advanced pattern matching and formatting\n - Features:\n - Line-based and multi-line content matching\n - Whitespace normalization with indentation preservation\n - Fuzzy matching with confidence scoring\n - Multiple simultaneous edits with correct positioning\n - Indentation style detection and preservation\n - Git-style diff output with context\n - Preview changes with dry run mode\n - Failed match debugging with confidence scores\n - Inputs:\n - `path` (string): File to edit\n - `edits` (array): List of edit operations\n - `oldText` (string): Text to search for (can be substring)\n - `newText` (string): Text to replace with\n - `dryRun` (boolean): Preview changes without applying (default: false)\n - `options` (object): Optional formatting settings\n - `preserveIndentation` (boolean): Keep existing indentation (default: true)\n - `normalizeWhitespace` (boolean): Normalize spaces while preserving structure (default: true)\n - `partialMatch` (boolean): Enable fuzzy matching (default: true)\n - Returns detailed diff and match information for dry runs, otherwise applies changes\n - Best Practice: Always use dryRun first to preview changes before applying them\n\n- **create_directory**\n - Create new directory or ensure it exists\n - Input: `path` (string)\n - Creates parent directories if needed\n - Succeeds silently if directory exists\n\n- **list_directory**\n - List directory contents with [FILE] or [DIR] prefixes\n - Input: `path` (string)\n\n- **move_file**\n - Move or rename files and directories\n - Inputs:\n - `source` (string)\n - `destination` (string)\n - Fails if destination exists\n\n- **search_files**\n - Recursively search for files/directories\n - Inputs:\n - `path` (string): Starting directory\n - `pattern` (string): Search pattern\n - `excludePatterns` (string[]): Exclude any patterns. Glob formats are supported.\n - Case-insensitive matching\n - Returns full paths to matches\n\n- **get_file_info**\n - Get detailed file/directory metadata\n - Input: `path` (string)\n - Returns:\n - Size\n - Creation time\n - Modified time\n - Access time\n - Type (file/directory)\n - Permissions\n\n- **list_allowed_directories**\n - List all directories the server is allowed to access\n - No input required\n - Returns:\n - Directories that this server can read/write from", + "description": "### Commands\n\n- **read_file**\n - Read complete contents of a file\n - Input: `path` (string)\n - Reads complete file contents with UTF-8 encoding\n\n- **read_multiple_files**\n - Read multiple files simultaneously\n - Input: `paths` (string[])\n - Failed reads won't stop the entire operation\n\n- **write_file**\n - Create new file or overwrite existing (exercise caution with this)\n - Inputs:\n - `path` (string): File location\n - `content` (string): File content\n\n- **edit_file**\n - Make selective edits using advanced pattern matching and formatting\n - Features:\n - Line-based and multi-line content matching\n - Whitespace normalization with indentation preservation\n - Fuzzy matching with confidence scoring\n - Multiple simultaneous edits with correct positioning\n - Indentation style detection and preservation\n - Git-style diff output with context\n - Preview changes with dry run mode\n - Failed match debugging with confidence scores\n - Inputs:\n - `path` (string): File to edit\n - `edits` (array): List of edit operations\n - `oldText` (string): Text to search for (can be substring)\n - `newText` (string): Text to replace with\n - `dryRun` (boolean): Preview changes without applying (default: false)\n - `options` (object): Optional formatting settings\n - `preserveIndentation` (boolean): Keep existing indentation (default: true)\n - `normalizeWhitespace` (boolean): Normalize spaces while preserving structure (default: true)\n - `partialMatch` (boolean): Enable fuzzy matching (default: true)\n - Returns detailed diff and match information for dry runs, otherwise applies changes\n - Best Practice: Always use dryRun first to preview changes before applying them\n\n- **create_directory**\n - Create new directory or ensure it exists\n - Input: `path` (string)\n - Creates parent directories if needed\n - Succeeds silently if directory exists\n\n- **list_directory**\n - List directory contents with [FILE] or [DIR] prefixes\n - Input: `path` (string)\n\n- **move_file**\n - Move or rename files and directories\n - Inputs:\n - `source` (string)\n - `destination` (string)\n - Fails if destination exists\n\n- **search_files**\n - Recursively search for files/directories\n - Inputs:\n - `path` (string): Starting directory\n - `pattern` (string): Search pattern\n - `excludePatterns` (string[]): Exclude any patterns. Glob formats are supported.\n - Case-insensitive matching\n - Returns full paths to matches\n\n- **get_file_info**\n - Get detailed file/directory metadata\n - Input: `path` (string)\n - Returns:\n - Size\n - Creation time\n - Modified time\n - Access time\n - Type (file/directory)\n - Permissions\n\n- **list_allowed_directories**\n - List all directories the server is allowed to access\n - No input required\n - Returns:\n - Directories that this server can read/write from", "enum": [ "read_file", "read_multiple_files", @@ -29,7 +29,7 @@ struct FileSystem : BaseTool { }, "path": { "type": "string", - "description": "The path to the file or directory to operate on. Only works within allowed directories. Required by all tools except `read_multiple_files`, `move_file` and `list_allowed_directories`." + "description": "The path to the file or directory to operate on. Only works within allowed directories. Required by all commands except `read_multiple_files`, `move_file` and `list_allowed_directories`." }, "paths": { "type": "array", @@ -46,6 +46,28 @@ struct FileSystem : BaseTool { "type": "array", "description": "Each edit replaces exact line sequences with new content. Required by `edit_file`." }, + "dryRun": { + "type": "boolean", + "description": "Preview changes without applying. Default: false. Required by `edit_file`." + }, + "options": { + "type": "object", + "description": "Optional formatting settings. Required by `edit_file`.", + "properties": { + "preserveIndentation": { + "type": "boolean", + "description": "Keep existing indentation. Default: true. Required by `edit_file`." + }, + "normalizeWhitespace": { + "type": "boolean", + "description": "Normalize spaces while preserving structure. Default: true. Required by `edit_file`." + }, + "partialMatch": { + "type": "boolean", + "description": "Enable fuzzy matching. Default: true. Required by `edit_file`." + } + } + }, "source": { "type": "string", "description": "The source path to move or rename. Required by `move_file`." @@ -57,12 +79,19 @@ struct FileSystem : BaseTool { "pattern": { "type": "string", "description": "The pattern to search for. Required by `search_files`." + }, + "excludePatterns": { + "type": "array", + "description": "An array of patterns to exclude from the search. Glob formats are supported. Required by `search_files`.", + "items": { + "type": "string" + } } }, - "required": ["tool"] + "required": ["command"] })json"); - inline static std::set allowed_tools = { + inline static std::set allowed_commands = { "read_file", "read_multiple_files", "write_file", @@ -75,7 +104,7 @@ struct FileSystem : BaseTool { "list_allowed_directories" }; - FileSystem() : BaseTool(name_, description_, parameters_) {} + Filesystem() : BaseTool(name_, description_, parameters_) {} ToolResult execute(const json& args) override { try { @@ -83,26 +112,26 @@ struct FileSystem : BaseTool { return ToolError("Failed to initialize shell client"); } - std::string tool; - if (args.contains("tool")) { - if (args["tool"].is_string()) { - tool = args["tool"].get(); + std::string command; + if (args.contains("command")) { + if (args["command"].is_string()) { + command = args["command"].get(); } else { - return ToolError("Invalid tool format"); + return ToolError("Invalid command format"); } } else { - return ToolError("Tool is required"); + return ToolError("'command' is required"); } - - if (allowed_tools.find(tool) == allowed_tools.end()) { - return ToolError("Unknown tool '" + tool + "'. Please use one of the following tools: " + - std::accumulate(allowed_tools.begin(), allowed_tools.end(), std::string(), + + if (allowed_commands.find(command) == allowed_commands.end()) { + return ToolError("Unknown command '" + command + "'. Please use one of the following commands: " + + std::accumulate(allowed_commands.begin(), allowed_commands.end(), std::string(), [](const std::string& a, const std::string& b) { return a + (a.empty() ? "" : ", ") + b; })); } - json result = _client->call_tool(tool, args); + json result = _client->call_tool(command, args); bool is_error = result.value("isError", false); diff --git a/tool/planning.h b/tool/planning.h index a98dd24..c8de1af 100644 --- a/tool/planning.h +++ b/tool/planning.h @@ -2,13 +2,13 @@ #define HUMANUS_TOOL_PLANNING_H #include "base.h" -#include "../prompt.h" +#include "prompt.h" namespace humanus { struct PlanningTool : BaseTool { inline static const std::string name_ = "planning"; - inline static const std::string description_ = _PLANNING_TOOL_DESCRIPTION; + inline static const std::string description_ = "Plan and track your tasks."; inline static const json parameters_ = json::parse(R"json({ "type": "object", "properties": { diff --git a/tool/puppeteer.h b/tool/puppeteer.h index b959012..6629519 100644 --- a/tool/puppeteer.h +++ b/tool/puppeteer.h @@ -11,9 +11,9 @@ struct Puppeteer : BaseTool { inline static const json parameters_ = json::parse(R"json({ "type": "object", "properties": { - "tool": { + "command": { "type": "string", - "description": "### Tools\n\n- **navigate**\n - Navigate to any URL in the browser\n - Input: `url` (string)\n\n- **screenshot**\n - Capture screenshots of the entire page or specific elements\n - Inputs:\n - `name` (string, required): Name for the screenshot\n - `selector` (string, optional): CSS selector for element to screenshot\n - `width` (number, optional, default: 800): Screenshot width\n - `height` (number, optional, default: 600): Screenshot height\n\n- **click**\n - Click elements on the page\n - Input: `selector` (string): CSS selector for element to click\n\n- **hover**\n - Hover elements on the page\n - Input: `selector` (string): CSS selector for element to hover\n\n- **fill**\n - Fill out input fields\n - Inputs:\n - `selector` (string): CSS selector for input field\n - `value` (string): Value to fill\n\n- **select**\n - Select an element with SELECT tag\n - Inputs:\n - `selector` (string): CSS selector for element to select\n - `value` (string): Value to select\n\n- **evaluate**\n - Execute JavaScript in the browser console\n - Input: `script` (string): JavaScript code to execute", + "description": "### Commands\n\n- **navigate**\n - Navigate to any URL in the browser\n - Input: `url` (string)\n\n- **screenshot**\n - Capture screenshots of the entire page or specific elements\n - Inputs:\n - `name` (string, required): Name for the screenshot\n - `selector` (string, optional): CSS selector for element to screenshot\n - `width` (number, optional, default: 800): Screenshot width\n - `height` (number, optional, default: 600): Screenshot height\n\n- **click**\n - Click elements on the page\n - Input: `selector` (string): CSS selector for element to click\n\n- **hover**\n - Hover elements on the page\n - Input: `selector` (string): CSS selector for element to hover\n\n- **fill**\n - Fill out input fields\n - Inputs:\n - `selector` (string): CSS selector for input field\n - `value` (string): Value to fill\n\n- **select**\n - Select an element with SELECT tag\n - Inputs:\n - `selector` (string): CSS selector for element to select\n - `value` (string): Value to select\n\n- **evaluate**\n - Execute JavaScript in the browser console\n - Input: `script` (string): JavaScript code to execute", "enum": [ "navigate", "screenshot", @@ -55,10 +55,10 @@ struct Puppeteer : BaseTool { "description": "The JavaScript code to execute. Required by `evaluate`." } }, - "required": ["tool"] + "required": ["command"] })json"); - inline static std::set allowed_tools = { + inline static std::set allowed_commands = { "navigate", "screenshot", "click", @@ -76,27 +76,41 @@ struct Puppeteer : BaseTool { return ToolError("Failed to initialize puppeteer client"); } - std::string tool; - if (args.contains("tool")) { - if (args["tool"].is_string()) { - tool = args["tool"].get(); + std::string command; + if (args.contains("command")) { + if (args["command"].is_string()) { + command = args["command"].get(); } else { - return ToolError("Invalid tool format"); + return ToolError("Invalid command format"); } } else { - return ToolError("Tool is required"); + return ToolError("'command' is required"); } - if (allowed_tools.find(tool) == allowed_tools.end()) { - return ToolError("Unknown tool '" + tool + "'. Please use one of the following tools: " + - std::accumulate(allowed_tools.begin(), allowed_tools.end(), std::string(), + if (allowed_commands.find(command) == allowed_commands.end()) { + return ToolError("Unknown command '" + command + "'. Please use one of the following commands: " + + std::accumulate(allowed_commands.begin(), allowed_commands.end(), std::string(), [](const std::string& a, const std::string& b) { return a + (a.empty() ? "" : ", ") + b; })); } - json result = _client->call_tool("puppeteer_" + tool, args); + json result = _client->call_tool("puppeteer_" + command, args); + if (result["content"].is_array()) { + for (size_t i = 0; i < result["content"].size(); i++) { + if (result["content"][i]["type"] == "image") { + std::string data = result["content"][i]["data"].get(); + std::string mimeType = result["content"][i].value("mimeType", "image/png"); + // Convert to OAI-complatible image_url format + result["content"][i] = { + {"type", "image_url"}, + {"image_url", {"url", "data:" + mimeType + ";base64," + data}} + }; + } + } + } + bool is_error = result.value("isError", false); // Return different ToolResult based on whether there is an error diff --git a/tool/python_execute.h b/tool/python_execute.h index 0d6abac..25d384c 100644 --- a/tool/python_execute.h +++ b/tool/python_execute.h @@ -2,7 +2,7 @@ #define HUMANUS_TOOL_PYTHON_EXECUTE_H #include "base.h" -#include "mcp/include/mcp_client.h" +#include "mcp_client.h" namespace humanus { diff --git a/tool/shell.h b/tool/shell.h index 066e6b4..e3f9f36 100644 --- a/tool/shell.h +++ b/tool/shell.h @@ -2,9 +2,9 @@ #define HUMANUS_TOOL_BASH_H #include "base.h" -#include "../mcp/include/mcp_stdio_client.h" -#include "../toml.hpp" -#include "../config.h" +#include "mcp_stdio_client.h" +#include "toml.hpp" +#include "config.h" #include namespace humanus { diff --git a/tool/terminate.h b/tool/terminate.h index 51ec443..f1e061d 100644 --- a/tool/terminate.h +++ b/tool/terminate.h @@ -1,7 +1,10 @@ #ifndef HUMANUS_TOOL_TERMINATE_H #define HUMANUS_TOOL_TERMINATE_H + #include "base.h" -#include "../prompt.h" +#include "prompt.h" + +namespace humanus { struct Terminate : humanus::BaseTool { inline static const std::string name_ = "terminate"; @@ -21,12 +24,13 @@ struct Terminate : humanus::BaseTool { Terminate() : BaseTool(name_, description_, parameters_) {} // Finish the current execution - humanus::ToolResult execute(const humanus::json& arguments) override { - return humanus::ToolResult{ + ToolResult execute(const json& arguments) override { + return ToolResult{ "The interaction has been completed with status: " + arguments.value("status", "unknown") }; } }; +} #endif // HUMANUS_TOOL_TERMINATE_H \ No newline at end of file