fix bug; refine planning flow

main
hkr04 2025-03-20 01:12:15 +08:00
parent f9d544a147
commit 1d87a2f4a6
17 changed files with 189 additions and 115 deletions

View File

@ -79,7 +79,7 @@ struct BaseAgent : std::enable_shared_from_this<BaseAgent> {
if (role == "user") {
memory->add_message(Message::user_message(content));
} else if (role == "assistant") {
memory->add_message(Message::assistant_message(content));
memory->add_message(Message::assistant_message(content), std::forward<Args>(args)...);
} else if (role == "system") {
memory->add_message(Message::system_message(content));
} else if (role == "tool") {

View File

@ -9,6 +9,7 @@
#include "tool/terminate.h"
#include "tool/puppeteer.h"
#include "tool/filesystem.h"
#include "tool/shell.h"
namespace humanus {
@ -26,6 +27,7 @@ struct Humanus : ToolCallAgent {
std::make_shared<PythonExecute>(),
std::make_shared<Puppeteer>(), // for web browsing
std::make_shared<Filesystem>(),
// std::make_shared<Shell>(),
std::make_shared<Terminate>()
}
),

View File

@ -35,7 +35,7 @@ bool PlanningAgent::think() {
step_execution_tracker[latest_tool_call.id] = {
{"step_index", current_step_index},
{"tool_name", latest_tool_call.function.name},
{"status", "pending"} // Will be updated after execution
{"step_status", "pending"} // Will be updated after execution
};
}
}
@ -212,8 +212,8 @@ void PlanningAgent::create_initial_plan(const std::string& request) {
tool_calls = ToolCall::from_json_list(response["tool_calls"]);
Message assistant_msg = Message::from_tool_calls(
tool_calls, response["content"]
Message assistant_msg = Message::assistant_message(
response["content"], tool_calls
);
memory->add_message(assistant_msg);

View File

@ -16,7 +16,7 @@ bool ToolCallAgent::think() {
tool_calls = ToolCall::from_json_list(response["tool_calls"]);
// Log response info
logger->info("" + name + "'s thoughts: " + response["content"].dump());
logger->info("" + name + "'s thoughts: " + response["content"].get<std::string>());
logger->info(
"🛠️ " + name + " selected " + std::to_string(tool_calls.size()) + " tool(s) to use"
);
@ -44,9 +44,7 @@ bool ToolCallAgent::think() {
}
// Create and add assistant message
auto assistant_msg = tool_calls.size() > 0 ?
Message::from_tool_calls(tool_calls, response["content"]) :
Message::assistant_message(response["content"]);
auto assistant_msg = Message::assistant_message(response["content"], tool_calls);
memory->add_message(assistant_msg);
if (tool_choice == "required" && tool_calls.empty()) {

View File

@ -37,7 +37,7 @@ struct LLMSettings {
const std::string& base_url = "https://api.deepseek.com",
const std::string& end_point = "/v1/chat/completions",
int max_tokens = 4096,
int timeout = 60,
int timeout = 120,
double temperature = 1.0,
bool oai_tool_support = true
) : model(model), api_key(api_key), base_url(base_url), end_point(end_point),

View File

@ -18,4 +18,5 @@ args = ["-y",
[shell]
type = "stdio"
command = "uvx mcp-shell-server"
command = "npx"
args = ["-y", "@kevinwatt/shell-mcp"]

View File

@ -47,7 +47,6 @@ std::string PlanningFlow::execute(const std::string& input) {
// Exit if no more steps or plan completed
if (current_step_index < 0) {
result += _finalize_plan();
break;
}
@ -55,7 +54,7 @@ std::string PlanningFlow::execute(const std::string& input) {
std::string step_type = step_info.value("type", "");
auto executor = get_executor(step_type);
std::string step_result = _execute_step(executor, step_info);
result += step_result + "\n";
// result += step_result + "\n";
// Check if agent wants to terminate
if (executor->state == AgentState::FINISHED || executor->state == AgentState::ERR) {
@ -63,9 +62,18 @@ std::string PlanningFlow::execute(const std::string& input) {
}
// Refactor memory
std::string prefix_sum = _summarize_plan(executor->memory->messages);
executor->reset(true); // TODO: More fine-grained memory reset?
executor->update_memory("assistant", prefix_sum);
if (!input.empty()) {
executor->update_memory("user", "Continue to accomplish the task: " + input);
}
result += step_info.value("type", "Step " + std::to_string(current_step_index)) + ":\n" + prefix_sum + "\n\n";
}
reset(true); // Clear memory and state for next plan
return result;
} catch (const std::exception& e) {
LOG_ERROR("Error executing planning flow: " + std::string(e.what()));
@ -135,7 +143,7 @@ void PlanningFlow::_create_initial_plan(const std::string& request) {
}
// Parse the current plan to identify the first non-completed step's index and info.
// Returns (None, None) if no active step is found.
// Returns (-1, None) if no active step is found.
void PlanningFlow::_get_current_step_info(int& current_step_index, json& step_info) {
if (active_plan_id.empty() || planning_tool->plans.find(active_plan_id) == planning_tool->plans.end()) {
logger->error("Plan with ID " + active_plan_id + " not found");
@ -146,7 +154,7 @@ void PlanningFlow::_get_current_step_info(int& current_step_index, json& step_in
try {
// Direct access to plan data from planning tool storage
json& plan_data = planning_tool->plans[active_plan_id];
const json& plan_data = planning_tool->plans[active_plan_id];
json steps = plan_data.value("steps", json::array());
json step_statuses = plan_data.value("step_statuses", json::array());
@ -155,10 +163,10 @@ void PlanningFlow::_get_current_step_info(int& current_step_index, json& step_in
const auto& step = steps[i].get<std::string>();
std::string step_status;
if (i >= step_status.size()) {
if (i >= step_statuses.size()) {
step_status = "not_started";
} else {
step_status = step_status[i];
step_status = step_statuses[i].get<std::string>();
}
if (step_status == "not_started" || step_status == "in_progress") {
@ -166,6 +174,8 @@ void PlanningFlow::_get_current_step_info(int& current_step_index, json& step_in
step_info = {
{"type", step}
};
} else { // completed or skipped
continue;
}
// Try to extract step type from the text (e.g., [SEARCH] or [CODE])
@ -177,12 +187,16 @@ void PlanningFlow::_get_current_step_info(int& current_step_index, json& step_in
// Mark current step as in_progress
try {
planning_tool->execute({
ToolResult result = planning_tool->execute({
{"command", "mark_step"},
{"plan_id", active_plan_id},
{"step_index", i},
{"status", "in_progress"}
{"step_status", "in_progress"}
});
logger->info(
"Started executing step " + std::to_string(i) + " in plan " + active_plan_id
+ "\n\n" + result.to_string() + "\n\n"
);
} catch (const std::exception& e) {
logger->error("Error marking step as in_progress: " + std::string(e.what()));
// Update step status directly if needed
@ -195,7 +209,7 @@ void PlanningFlow::_get_current_step_info(int& current_step_index, json& step_in
step_statuses.push_back("in_progress");
}
plan_data["step_statuses"] = step_statuses;
planning_tool->plans[active_plan_id]["step_statuses"] = step_statuses;
}
current_step_index = i;
@ -262,7 +276,7 @@ void PlanningFlow::_mark_step_completed() {
logger->warn("Failed to update plan status: " + std::string(e.what()));
// Update step status directly in planning tool storage
if (planning_tool->plans.find(active_plan_id) != planning_tool->plans.end()) {
json& plan_data = planning_tool->plans[active_plan_id];
const json& plan_data = planning_tool->plans[active_plan_id];
json step_statuses = plan_data.value("step_statuses", json::array());
// Ensure the step_statuses list is long enough
@ -272,7 +286,7 @@ void PlanningFlow::_mark_step_completed() {
// Update the status
step_statuses[current_step_index] = "completed";
plan_data["step_statuses"] = step_statuses;
planning_tool->plans[active_plan_id]["step_statuses"] = step_statuses;
}
}
}
@ -299,7 +313,7 @@ std::string PlanningFlow::_generate_plan_text_from_storage() {
return "Error: Plan with ID " + active_plan_id + " not found";
}
json& plan_data = planning_tool->plans[active_plan_id];
const json& plan_data = planning_tool->plans[active_plan_id];
auto title = plan_data.value("title", "Untitled Plan");
auto steps = plan_data.value("steps", json::array());
auto step_statuses = plan_data.value("step_statuses", json::array());
@ -372,39 +386,33 @@ std::string PlanningFlow::_generate_plan_text_from_storage() {
}
}
// Finalize the plan and provide a summary using the flow's LLM directly
std::string PlanningFlow::_finalize_plan() {
// Summarize the plan using the flow's LLM directly
std::string PlanningFlow::_summarize_plan(const std::vector<Message> messages) {
std::string plan_text = _get_plan_text();
std::string system_prompt = "You are a planning assistant. Your task is to summarize the current plan.";
std::string next_step_prompt = "Above is the nearest finished step in the plan. Here is the current plan status:\n\n" + plan_text + "\n\n"
+ "Please provide a summary of what was accomplished and any thoughts for next steps (when the plan is not fully finished).";
// Create a summary using the flow's LLM directly
try {
std::string system_prompt = "You are a planning assistant. Your task is to summarize the completed plan.";
Message user_message = Message::user_message(
"The plan has been completed. Here is the final plan status:\n\n" + plan_text + "\n\n" +
"Please provide a summary of what was accomplished and any final thoughts."
);
auto response = llm->ask(
{user_message},
system_prompt
messages,
system_prompt,
next_step_prompt
);
return response;
} catch (const std::exception& e) {
LOG_ERROR("Error finalizing plan with LLM: " + std::string(e.what()));
LOG_ERROR("Error summarizing plan with LLM: " + std::string(e.what()));
// Fallback to using an agent for the summary
try {
auto agent = primary_agent();
std::string summary_prompt = "\nThe plan has been completed. Here is the final plan status:\n\n";
summary_prompt += plan_text + "\n\n";
summary_prompt += "Please provide a summary of what was accomplished and any final thoughts.\n";
std::string summary = agent->run(summary_prompt);
return "Plan completed:\n\n" + summary;
std::string summary = agent->run(system_prompt + next_step_prompt);
return summary;
} catch (const std::exception& e2) {
LOG_ERROR("Error finalizing plan with agent: " + std::string(e2.what()));
return "Plan completed. Error generating summary.";
LOG_ERROR("Error summarizing plan with agent: " + std::string(e2.what()));
return "Error generating summary.";
}
}
}

View File

@ -74,8 +74,17 @@ struct PlanningFlow : public BaseFlow {
// Generate plan text directly from storage if the planning tool fails.
std::string _generate_plan_text_from_storage();
// Finalize the plan and provide a summary using the flow's LLM directly
std::string _finalize_plan();
// Summarize the plan using the flow's LLM directly
std::string _summarize_plan(const std::vector<Message> messages);
// Reset the flow to its initial state.
void reset(bool reset_memory = true) {
active_plan_id = "plan_" + std::to_string(std::chrono::system_clock::now().time_since_epoch().count());
current_step_index = -1;
for (const auto& [key, agent] : agents) {
agent->reset(reset_memory);
}
}
};
}

36
llm.h
View File

@ -124,6 +124,9 @@ public:
formatted_messages[++j] = formatted_messages[i];
} else {
formatted_messages[j]["content"] = concat_content(formatted_messages[j]["content"], formatted_messages[i]["content"]);
if (!formatted_messages[i]["tool_calls"].empty()) {
formatted_messages[j]["tool_calls"] = concat_content(formatted_messages[j]["tool_calls"], formatted_messages[i]["tool_calls"]);
}
}
}
@ -136,6 +139,7 @@ public:
* @brief Send a request to the LLM and get the reply
* @param messages The conversation message list
* @param system_prompt Optional system message
* @param next_step_prompt Optional prompt message for the next step
* @param max_retries The maximum number of retries
* @return The generated assistant content
* @throws std::invalid_argument If the message is invalid or the reply is empty
@ -144,6 +148,7 @@ public:
std::string ask(
const std::vector<Message>& messages,
const std::string& system_prompt = "",
const std::string& next_step_prompt = "",
int max_retries = 3
) {
json formatted_messages = json::array();
@ -158,6 +163,24 @@ public:
json _formatted_messages = format_messages(messages);
formatted_messages.insert(formatted_messages.end(), _formatted_messages.begin(), _formatted_messages.end());
if (!next_step_prompt.empty()) {
if (formatted_messages.empty() || formatted_messages.back()["role"] != "user") {
formatted_messages.push_back({
{"role", "user"},
{"content", next_step_prompt}
});
} else {
if (formatted_messages.back()["content"].is_string()) {
formatted_messages.back()["content"] = formatted_messages.back()["content"].get<std::string>() + "\n\n" + next_step_prompt;
} else if (formatted_messages.back()["content"].is_array()) {
formatted_messages.back()["content"].push_back({
{"type", "text"},
{"text", next_step_prompt}
});
}
}
}
json body = {
{"model", llm_config_->model},
{"messages", formatted_messages},
@ -333,6 +356,19 @@ public:
logger->info("Retrying " + std::to_string(retry) + "/" + std::to_string(max_retries));
}
// If the logger has a file sink, log the request body
if (logger->sinks().size() > 1) {
auto file_sink = std::dynamic_pointer_cast<spdlog::sinks::basic_file_sink_mt>(logger->sinks()[1]);
if (file_sink) {
file_sink->log(spdlog::details::log_msg(
spdlog::source_loc{},
logger->name(),
spdlog::level::debug,
"Failed to get response from LLM. Full request body: " + body_str
));
}
}
throw std::runtime_error("Failed to get response from LLM");
}
};

View File

@ -44,62 +44,62 @@ int main() {
#endif
}
// Humanus agent = Humanus();
// while (true) {
// if (agent.current_step == agent.max_steps) {
// std::cout << "Automatically paused after " << agent.max_steps << " steps." << std::endl;
// std::cout << "Enter your prompt (enter en empty line to resume or 'exit' to quit): ";
// agent.reset(false);
// } else {
// std::cout << "Enter your prompt (or 'exit' to quit): ";
// }
// std::string prompt;
// std::getline(std::cin, prompt);
// if (prompt == "exit") {
// logger->info("Goodbye!");
// break;
// }
// logger->info("Processing your request...");
// agent.run(prompt);
// }
std::shared_ptr<BaseAgent> agent_ptr = std::make_shared<Humanus>();
std::map<std::string, std::shared_ptr<BaseAgent>> agents;
agents["default"] = agent_ptr;
auto flow = FlowFactory::create_flow(
FlowType::PLANNING,
nullptr, // llm
nullptr, // planning_tool
std::vector<std::string>{}, // executor_keys
"", // active_plan_id
agents, // agents
std::vector<std::shared_ptr<BaseTool>>{}, // tools
"default" // primary_agent_key
);
Humanus agent = Humanus();
while (true) {
if (agent_ptr->current_step == agent_ptr->max_steps) {
std::cout << "Program automatically paused after " << agent_ptr->current_step << " steps." << std::endl;
std::cout << "Enter your prompt (enter empty line to resume or 'exit' to quit): ";
agent_ptr->reset(false);
if (agent.current_step == agent.max_steps) {
std::cout << "Automatically paused after " << agent.max_steps << " steps." << std::endl;
std::cout << "Enter your prompt (enter an empty line to resume or 'exit' to quit): ";
agent.reset(false);
} else {
std::cout << "Enter your prompt (or 'exit' to quit): ";
}
if (agent_ptr->state != AgentState::IDLE) {
break;
}
std::string prompt;
std::getline(std::cin, prompt);
if (prompt == "exit") {
logger->info("Goodbye!");
break;
}
logger->info("Processing your request...");
agent.run(prompt);
}
std::cout << "Processing your request..." << std::endl;
auto result = flow->execute(prompt);
std::cout << result << std::endl;
}
// std::shared_ptr<BaseAgent> agent_ptr = std::make_shared<Humanus>();
// std::map<std::string, std::shared_ptr<BaseAgent>> agents;
// agents["default"] = agent_ptr;
// auto flow = FlowFactory::create_flow(
// FlowType::PLANNING,
// nullptr, // llm
// nullptr, // planning_tool
// std::vector<std::string>{}, // executor_keys
// "", // active_plan_id
// agents, // agents
// std::vector<std::shared_ptr<BaseTool>>{}, // tools
// "default" // primary_agent_key
// );
// while (true) {
// if (agent_ptr->current_step == agent_ptr->max_steps) {
// std::cout << "Automatically paused after " << agent_ptr->current_step << " steps." << std::endl;
// std::cout << "Enter your prompt (enter an empty line to resume or 'exit' to quit): ";
// agent_ptr->reset(false);
// } else {
// std::cout << "Enter your prompt (or 'exit' to quit): ";
// }
// if (agent_ptr->state != AgentState::IDLE) {
// break;
// }
// std::string prompt;
// std::getline(std::cin, prompt);
// if (prompt == "exit") {
// logger->info("Goodbye!");
// break;
// }
// std::cout << "Processing your request..." << std::endl;
// auto result = flow->execute(prompt);
// std::cout << result << std::endl;
// }
}

2
mcp

@ -1 +1 @@
Subproject commit 1e66845e8a4306903bd7201a746fc5816cf509b7
Subproject commit 4536d45a9e18def8d88a41b95b4fa17bc5e574d6

View File

@ -16,7 +16,9 @@ const char* NEXT_STEP_PROMPT = R"(You can interact with the computer using pytho
- puppeteer: Open, browse, and get screenshots of web pages using Puppeteer, a headless Chrome browser.
Based on user needs, proactively select the most appropriate tool or combination of tools. For complex tasks, you can break down the problem and use different tools step by step to solve it. After using each tool, clearly explain the execution results and suggest the next steps.)";
Based on user needs, proactively select the most appropriate tool or combination of tools. For complex tasks, you can break down the problem and use different tools step by step to solve it.
After using each tool, clearly explain the execution results and suggest the next steps. If you finish the current step, call `terminate` to switch to next step.)";
} // namespace humanus
namespace planning {

View File

@ -19,7 +19,6 @@ enum class AgentState {
ERR = 3 // Don't use ERROR
};
// 定义全局map
extern std::map<AgentState, std::string> agent_state_map;
struct Function {
@ -148,22 +147,11 @@ struct Message {
return Message("system", content);
}
static Message assistant_message(const json& content) {
return Message("assistant", content);
}
static Message tool_message(const json& content, const std::string& tool_call_id = "", const std::string& name = "") {
return Message("tool", content, name, tool_call_id);
}
/**
* @brief Create ToolCallsMessage from raw tool calls.
* @param tool_calls Raw tool calls from LLM
* @param content Optional message content
* @param kwargs Other optional arguments
* @return Message with tool calls
*/
static Message from_tool_calls(const std::vector<ToolCall>& tool_calls, const json& content = json::object()) {
static Message assistant_message(const json& content = "", const std::vector<ToolCall>& tool_calls = {}) {
return Message("assistant", content, "", "", tool_calls);
}
};
@ -177,7 +165,7 @@ struct Memory {
// Add a message to the memory
void add_message(const Message& message) {
messages.push_back(message);
while (!messages.empty() && messages.size() > max_messages || messages.begin()->role == "assistant" || messages.begin()->role == "tool") {
while (!messages.empty() && (messages.size() > max_messages || messages.begin()->role == "assistant" || messages.begin()->role == "tool")) {
// Ensure the first message is always a user or system message
messages.erase(messages.begin());
}

View File

@ -5,6 +5,7 @@
namespace humanus {
// https://github.com/modelcontextprotocol/servers/tree/HEAD/src/filesystem
struct Filesystem : BaseTool {
inline static const std::string name_ = "filesystem";
inline static const std::string description_ = "## Features\n\n- Read/write files\n- Create/list/delete directories\n- Move files/directories\n- Search files\n- Get file metadata";

View File

@ -22,7 +22,7 @@ ToolResult PlanningTool::execute(const json& args) {
std::string title = args.value("title", "");
std::vector<std::string> steps = args.value("steps", std::vector<std::string>());
int step_index = args.value("step_index", -1);
std::string step_status = args.value("step_status", "");
std::string step_status = args.value("step_status", "not_started");
std::string step_notes = args.value("step_notes", "");
if (command == "create") {

View File

@ -5,6 +5,7 @@
namespace humanus {
// https://github.com/modelcontextprotocol/servers/tree/HEAD/src/puppeteer
struct Puppeteer : BaseTool {
inline static const std::string name_ = "puppeteer";
inline static const std::string description_ = "A Model Context Protocol server that provides browser automation capabilities using Puppeteer.";
@ -105,7 +106,7 @@ struct Puppeteer : BaseTool {
// Convert to OAI-complatible image_url format
result["content"][i] = {
{"type", "image_url"},
{"image_url", {"url", "data:" + mimeType + ";base64," + data}}
{"image_url", {{"url", "data:" + mimeType + ";base64," + data}}}
};
}
}

View File

@ -10,6 +10,7 @@
namespace humanus {
// A tool for executing shell commands using MCP shell server
// https://github.com/kevinwatt/shell-mcp.git
struct Shell : BaseTool {
inline static const std::string name_ = "shell";
inline static const std::string description_ = "Execute a shell command in the terminal.";
@ -40,7 +41,7 @@ struct Shell : BaseTool {
"nslookup",
"ip",
"whereis"
],
]
},
"args": {
"type": "array",
@ -54,6 +55,29 @@ struct Shell : BaseTool {
"required": ["command"]
})json");
inline static std::set<std::string> allowed_commands = {
"ls",
"cat",
"pwd",
"df",
// "echo", // Not working now
"ps",
"free",
"uptime",
"date",
"grep",
"w",
"whois",
"find",
"netstat",
"lspci",
"lsusb",
"dig",
"nslookup",
"ip",
"whereis"
};
Shell() : BaseTool(name_, description_, parameters_) {}
ToolResult execute(const json& args) override {
@ -63,7 +87,6 @@ struct Shell : BaseTool {
return ToolError("Failed to initialize shell client");
}
// Handle command parameters
std::string command;
if (args.contains("command")) {
if (args["command"].is_string()) {
@ -72,14 +95,19 @@ struct Shell : BaseTool {
return ToolError("Invalid command format");
}
} else {
return ToolError("Command is required");
return ToolError("'command' is required");
}
json tool_args = args;
tool_args.erase("command");
if (allowed_commands.find(command) == allowed_commands.end()) {
return ToolError("Unknown command '" + command + "'. Please use one of the following commands: " +
std::accumulate(allowed_commands.begin(), allowed_commands.end(), std::string(),
[](const std::string& a, const std::string& b) {
return a + (a.empty() ? "" : ", ") + b;
}));
}
// Call shell tool
json result = _client->call_tool("shell_" + command, tool_args);
json result = _client->call_tool("shell_" + command, args);
bool is_error = result.value("isError", false);