fix bug; refine planning flow

2025-03-20 01:12:15 +08:00 · 2025-03-20 01:12:15 +08:00 · 1d87a2f4a6
parent f9d544a147
commit 1d87a2f4a6
17 changed files with 189 additions and 115 deletions
--- a/agent/base.h
+++ b/agent/base.h
@ -79,7 +79,7 @@ struct BaseAgent : std::enable_shared_from_this<BaseAgent> {
        if (role == "user") {
            memory->add_message(Message::user_message(content));
        } else if (role == "assistant") {
-            memory->add_message(Message::assistant_message(content));
+            memory->add_message(Message::assistant_message(content), std::forward<Args>(args)...);
        } else if (role == "system") {
            memory->add_message(Message::system_message(content));
        } else if (role == "tool") {
--- a/agent/humanus.h
+++ b/agent/humanus.h
@ -9,6 +9,7 @@
 #include "tool/terminate.h"
 #include "tool/puppeteer.h"
 #include "tool/filesystem.h"
+#include "tool/shell.h"

 namespace humanus {

@ -26,6 +27,7 @@ struct Humanus : ToolCallAgent {
                std::make_shared<PythonExecute>(),
                std::make_shared<Puppeteer>(), // for web browsing
                std::make_shared<Filesystem>(),
+                // std::make_shared<Shell>(),
                std::make_shared<Terminate>()
            }
        ),
--- a/agent/planning.cpp
+++ b/agent/planning.cpp
@ -35,7 +35,7 @@ bool PlanningAgent::think() {
            step_execution_tracker[latest_tool_call.id] = {
                {"step_index", current_step_index},
                {"tool_name", latest_tool_call.function.name},
-                {"status", "pending"} // Will be updated after execution
+                {"step_status", "pending"} // Will be updated after execution
            };
        }
    }
@ -212,8 +212,8 @@ void PlanningAgent::create_initial_plan(const std::string& request) {

    tool_calls = ToolCall::from_json_list(response["tool_calls"]);

-    Message assistant_msg = Message::from_tool_calls(
-        tool_calls, response["content"]
+    Message assistant_msg = Message::assistant_message(
+        response["content"], tool_calls
    );

    memory->add_message(assistant_msg);
--- a/agent/toolcall.cpp
+++ b/agent/toolcall.cpp
@ -16,7 +16,7 @@ bool ToolCallAgent::think() {
    tool_calls = ToolCall::from_json_list(response["tool_calls"]);

    // Log response info
-    logger->info("✨ " + name + "'s thoughts: " + response["content"].dump());
+    logger->info("✨ " + name + "'s thoughts: " + response["content"].get<std::string>());
    logger->info(
        "🛠️  " + name + " selected " + std::to_string(tool_calls.size()) + " tool(s) to use"
    );
@ -44,9 +44,7 @@ bool ToolCallAgent::think() {
        } 

        // Create and add assistant message
-        auto assistant_msg = tool_calls.size() > 0 ? 
-                            Message::from_tool_calls(tool_calls, response["content"]) :
-                            Message::assistant_message(response["content"]);
+        auto assistant_msg = Message::assistant_message(response["content"], tool_calls);
        memory->add_message(assistant_msg);

        if (tool_choice == "required" && tool_calls.empty()) {
--- a/config.h
+++ b/config.h
@ -37,7 +37,7 @@ struct LLMSettings {
        const std::string& base_url = "https://api.deepseek.com", 
        const std::string& end_point = "/v1/chat/completions",
        int max_tokens = 4096, 
-        int timeout = 60, 
+        int timeout = 120, 
        double temperature = 1.0,
        bool oai_tool_support = true
    ) : model(model), api_key(api_key), base_url(base_url), end_point(end_point),
--- a/config/config_mcp.toml
+++ b/config/config_mcp.toml
@ -18,4 +18,5 @@ args = ["-y",

 [shell]
 type = "stdio"
-command = "uvx mcp-shell-server"
+command = "npx"
+args = ["-y", "@kevinwatt/shell-mcp"]
--- a/flow/planning.cpp
+++ b/flow/planning.cpp
@ -47,7 +47,6 @@ std::string PlanningFlow::execute(const std::string& input) {

            // Exit if no more steps or plan completed
            if (current_step_index < 0) {
-                result += _finalize_plan();
                break;
            }

@ -55,7 +54,7 @@ std::string PlanningFlow::execute(const std::string& input) {
            std::string step_type = step_info.value("type", "");
            auto executor = get_executor(step_type);
            std::string step_result = _execute_step(executor, step_info);
-            result += step_result + "\n";
+            // result += step_result + "\n";

            // Check if agent wants to terminate
            if (executor->state == AgentState::FINISHED || executor->state == AgentState::ERR) {
@ -63,9 +62,18 @@ std::string PlanningFlow::execute(const std::string& input) {
            }

            // Refactor memory
+            std::string prefix_sum = _summarize_plan(executor->memory->messages);
            executor->reset(true); // TODO: More fine-grained memory reset?
+            executor->update_memory("assistant", prefix_sum);
+            if (!input.empty()) {
+                executor->update_memory("user", "Continue to accomplish the task: " + input);
            }

+            result += step_info.value("type", "Step " + std::to_string(current_step_index)) + ":\n" + prefix_sum + "\n\n";
+        }
+
+        reset(true); // Clear memory and state for next plan
+
        return result;
    } catch (const std::exception& e) {
        LOG_ERROR("Error executing planning flow: " + std::string(e.what()));
@ -135,7 +143,7 @@ void PlanningFlow::_create_initial_plan(const std::string& request) {
 }

 // Parse the current plan to identify the first non-completed step's index and info.
-// Returns (None, None) if no active step is found.
+// Returns (-1, None) if no active step is found.
 void PlanningFlow::_get_current_step_info(int& current_step_index, json& step_info) {
    if (active_plan_id.empty() || planning_tool->plans.find(active_plan_id) == planning_tool->plans.end()) {
        logger->error("Plan with ID " + active_plan_id + " not found");
@ -146,7 +154,7 @@ void PlanningFlow::_get_current_step_info(int& current_step_index, json& step_in

    try {
        // Direct access to plan data from planning tool storage
-        json& plan_data = planning_tool->plans[active_plan_id];
+        const json& plan_data = planning_tool->plans[active_plan_id];
        json steps = plan_data.value("steps", json::array());
        json step_statuses = plan_data.value("step_statuses", json::array());

@ -155,10 +163,10 @@ void PlanningFlow::_get_current_step_info(int& current_step_index, json& step_in
            const auto& step = steps[i].get<std::string>();
            std::string step_status;

-            if (i >= step_status.size()) {
+            if (i >= step_statuses.size()) {
                step_status = "not_started";
            } else {
-                step_status = step_status[i];
+                step_status = step_statuses[i].get<std::string>();
            }
            
            if (step_status == "not_started" || step_status == "in_progress") {
@ -166,6 +174,8 @@ void PlanningFlow::_get_current_step_info(int& current_step_index, json& step_in
                step_info = {
                    {"type", step}
                };
+            } else { // completed or skipped
+                continue;
            }

            // Try to extract step type from the text (e.g., [SEARCH] or [CODE])
@ -177,12 +187,16 @@ void PlanningFlow::_get_current_step_info(int& current_step_index, json& step_in

            // Mark current step as in_progress
            try {
-                planning_tool->execute({
+                ToolResult result = planning_tool->execute({
                    {"command", "mark_step"},
                    {"plan_id", active_plan_id},
                    {"step_index", i},
-                    {"status", "in_progress"}
+                    {"step_status", "in_progress"}
                });
+                logger->info(
+                    "Started executing step " + std::to_string(i) + " in plan " + active_plan_id
+                    + "\n\n" + result.to_string() + "\n\n"
+                );
            } catch (const std::exception& e) {
                logger->error("Error marking step as in_progress: " + std::string(e.what()));
                // Update step status directly if needed
@ -195,7 +209,7 @@ void PlanningFlow::_get_current_step_info(int& current_step_index, json& step_in
                    step_statuses.push_back("in_progress");
                }

-                plan_data["step_statuses"] = step_statuses;
+                planning_tool->plans[active_plan_id]["step_statuses"] = step_statuses;
            }

            current_step_index = i;
@ -262,7 +276,7 @@ void PlanningFlow::_mark_step_completed() {
        logger->warn("Failed to update plan status: " + std::string(e.what()));
        // Update step status directly in planning tool storage
        if (planning_tool->plans.find(active_plan_id) != planning_tool->plans.end()) {
-            json& plan_data = planning_tool->plans[active_plan_id];
+            const json& plan_data = planning_tool->plans[active_plan_id];
            json step_statuses = plan_data.value("step_statuses", json::array());

            // Ensure the step_statuses list is long enough
@ -272,7 +286,7 @@ void PlanningFlow::_mark_step_completed() {

            // Update the status
            step_statuses[current_step_index] = "completed";
-            plan_data["step_statuses"] = step_statuses;
+            planning_tool->plans[active_plan_id]["step_statuses"] = step_statuses;
        }
    }
 }
@ -299,7 +313,7 @@ std::string PlanningFlow::_generate_plan_text_from_storage() {
            return "Error: Plan with ID " + active_plan_id + " not found";
        }

-        json& plan_data = planning_tool->plans[active_plan_id];
+        const json& plan_data = planning_tool->plans[active_plan_id];
        auto title = plan_data.value("title", "Untitled Plan");
        auto steps = plan_data.value("steps", json::array());
        auto step_statuses = plan_data.value("step_statuses", json::array());
@ -372,39 +386,33 @@ std::string PlanningFlow::_generate_plan_text_from_storage() {
    }
 }

-// Finalize the plan and provide a summary using the flow's LLM directly
-std::string PlanningFlow::_finalize_plan() {
+// Summarize the plan using the flow's LLM directly
+std::string PlanningFlow::_summarize_plan(const std::vector<Message> messages) {
    std::string plan_text = _get_plan_text();

+    std::string system_prompt = "You are a planning assistant. Your task is to summarize the current plan.";
+    std::string next_step_prompt = "Above is the nearest finished step in the plan. Here is the current plan status:\n\n" + plan_text + "\n\n"
+                                + "Please provide a summary of what was accomplished and any thoughts for next steps (when the plan is not fully finished).";
+
    // Create a summary using the flow's LLM directly
    try {
-        std::string system_prompt = "You are a planning assistant. Your task is to summarize the completed plan.";
-
-        Message user_message = Message::user_message(
-            "The plan has been completed. Here is the final plan status:\n\n" + plan_text + "\n\n" +
-            "Please provide a summary of what was accomplished and any final thoughts."
-        );
-
        auto response = llm->ask(
-            {user_message},
-            system_prompt
+            messages,
+            system_prompt,
+            next_step_prompt
        );

        return response;
    } catch (const std::exception& e) {
-        LOG_ERROR("Error finalizing plan with LLM: " + std::string(e.what()));
-
+        LOG_ERROR("Error summarizing plan with LLM: " + std::string(e.what()));
        // Fallback to using an agent for the summary
        try {
            auto agent = primary_agent();
-            std::string summary_prompt = "\nThe plan has been completed. Here is the final plan status:\n\n";
-            summary_prompt += plan_text + "\n\n";
-            summary_prompt += "Please provide a summary of what was accomplished and any final thoughts.\n";
-            std::string summary = agent->run(summary_prompt);
-            return "Plan completed:\n\n" + summary;
+            std::string summary = agent->run(system_prompt + next_step_prompt);
+            return summary;
        } catch (const std::exception& e2) {
-            LOG_ERROR("Error finalizing plan with agent: " + std::string(e2.what()));
-            return "Plan completed. Error generating summary.";
+            LOG_ERROR("Error summarizing plan with agent: " + std::string(e2.what()));
+            return "Error generating summary.";
        }
    }
 }
--- a/flow/planning.h
+++ b/flow/planning.h
@ -74,8 +74,17 @@ struct PlanningFlow : public BaseFlow {
    // Generate plan text directly from storage if the planning tool fails.
    std::string _generate_plan_text_from_storage();

-    // Finalize the plan and provide a summary using the flow's LLM directly
-    std::string _finalize_plan();
+    // Summarize the plan using the flow's LLM directly
+    std::string _summarize_plan(const std::vector<Message> messages);
+
+    // Reset the flow to its initial state.
+    void reset(bool reset_memory = true) {
+        active_plan_id = "plan_" + std::to_string(std::chrono::system_clock::now().time_since_epoch().count());
+        current_step_index = -1;
+        for (const auto& [key, agent] : agents) {
+            agent->reset(reset_memory);
+        }
+    }
 };

 }
--- a/llm.h
+++ b/llm.h
@ -124,6 +124,9 @@ public:
                formatted_messages[++j] = formatted_messages[i];
            } else {
                formatted_messages[j]["content"] = concat_content(formatted_messages[j]["content"], formatted_messages[i]["content"]);
+                if (!formatted_messages[i]["tool_calls"].empty()) {
+                    formatted_messages[j]["tool_calls"] = concat_content(formatted_messages[j]["tool_calls"], formatted_messages[i]["tool_calls"]);
+                }
            }
        }

@ -136,6 +139,7 @@ public:
     * @brief Send a request to the LLM and get the reply
     * @param messages The conversation message list
     * @param system_prompt Optional system message
+     * @param next_step_prompt Optional prompt message for the next step
     * @param max_retries The maximum number of retries
     * @return The generated assistant content
     * @throws std::invalid_argument If the message is invalid or the reply is empty
@ -144,6 +148,7 @@ public:
    std::string ask(
        const std::vector<Message>& messages,
        const std::string& system_prompt = "",
+        const std::string& next_step_prompt = "",
        int max_retries = 3
    ) {
        json formatted_messages = json::array();
@ -158,6 +163,24 @@ public:
        json _formatted_messages = format_messages(messages);
        formatted_messages.insert(formatted_messages.end(), _formatted_messages.begin(), _formatted_messages.end());

+        if (!next_step_prompt.empty()) {
+            if (formatted_messages.empty() || formatted_messages.back()["role"] != "user") {
+                formatted_messages.push_back({
+                    {"role", "user"},
+                    {"content", next_step_prompt}
+                });
+            } else {
+                if (formatted_messages.back()["content"].is_string()) {
+                    formatted_messages.back()["content"] = formatted_messages.back()["content"].get<std::string>() + "\n\n" + next_step_prompt;
+                } else if (formatted_messages.back()["content"].is_array()) {
+                    formatted_messages.back()["content"].push_back({
+                        {"type", "text"},
+                        {"text", next_step_prompt}
+                    });
+                }
+            }
+        }
+
        json body = {
            {"model", llm_config_->model},
            {"messages", formatted_messages},
@ -333,6 +356,19 @@ public:
            logger->info("Retrying " + std::to_string(retry) + "/" + std::to_string(max_retries));
        }

+        // If the logger has a file sink, log the request body
+        if (logger->sinks().size() > 1) {
+            auto file_sink = std::dynamic_pointer_cast<spdlog::sinks::basic_file_sink_mt>(logger->sinks()[1]);
+            if (file_sink) {
+                file_sink->log(spdlog::details::log_msg(
+                    spdlog::source_loc{},
+                    logger->name(),
+                    spdlog::level::debug,
+                    "Failed to get response from LLM. Full request body: " + body_str
+                ));
+            }
+        }
+
        throw std::runtime_error("Failed to get response from LLM");
    }
 };
--- a/main.cpp
+++ b/main.cpp
@ -44,62 +44,62 @@ int main() {
 #endif
    }
    
-    // Humanus agent = Humanus();
-    // while (true) {
-    //     if (agent.current_step == agent.max_steps) {
-    //         std::cout << "Automatically paused after " << agent.max_steps << " steps." << std::endl;
-    //         std::cout << "Enter your prompt (enter en empty line to resume or 'exit' to quit): ";
-    //         agent.reset(false);
-    //     } else {
-    //         std::cout << "Enter your prompt (or 'exit' to quit): ";
-    //     }
-    //     std::string prompt;
-    //     std::getline(std::cin, prompt);
-    //     if (prompt == "exit") {
-    //         logger->info("Goodbye!");
-    //         break;
-    //     }
-    //     logger->info("Processing your request...");
-    //     agent.run(prompt);
-    // }
-
-    std::shared_ptr<BaseAgent> agent_ptr = std::make_shared<Humanus>();
-    std::map<std::string, std::shared_ptr<BaseAgent>> agents;
-    agents["default"] = agent_ptr;
-
-    auto flow = FlowFactory::create_flow(
-        FlowType::PLANNING,
-        nullptr,  // llm
-        nullptr,  // planning_tool
-        std::vector<std::string>{},  // executor_keys
-        "",  // active_plan_id
-        agents,  // agents
-        std::vector<std::shared_ptr<BaseTool>>{},  // tools
-        "default"  // primary_agent_key
-    );
-
+    Humanus agent = Humanus();
    while (true) {
-        if (agent_ptr->current_step == agent_ptr->max_steps) {
-            std::cout << "Program automatically paused after " << agent_ptr->current_step << " steps." << std::endl;
-            std::cout << "Enter your prompt (enter empty line to resume or 'exit' to quit): ";
-            agent_ptr->reset(false);
+        if (agent.current_step == agent.max_steps) {
+            std::cout << "Automatically paused after " << agent.max_steps << " steps." << std::endl;
+            std::cout << "Enter your prompt (enter an empty line to resume or 'exit' to quit): ";
+            agent.reset(false);
        } else {
            std::cout << "Enter your prompt (or 'exit' to quit): ";
        }
-
-        if (agent_ptr->state != AgentState::IDLE) {
-            break;
-        }
-        
        std::string prompt;
        std::getline(std::cin, prompt);
        if (prompt == "exit") {
            logger->info("Goodbye!");
            break;
        }
-
-        std::cout << "Processing your request..." << std::endl;
-        auto result = flow->execute(prompt);
-        std::cout << result << std::endl;
+        logger->info("Processing your request...");
+        agent.run(prompt);
    }
+
+    // std::shared_ptr<BaseAgent> agent_ptr = std::make_shared<Humanus>();
+    // std::map<std::string, std::shared_ptr<BaseAgent>> agents;
+    // agents["default"] = agent_ptr;
+
+    // auto flow = FlowFactory::create_flow(
+    //     FlowType::PLANNING,
+    //     nullptr,  // llm
+    //     nullptr,  // planning_tool
+    //     std::vector<std::string>{},  // executor_keys
+    //     "",  // active_plan_id
+    //     agents,  // agents
+    //     std::vector<std::shared_ptr<BaseTool>>{},  // tools
+    //     "default"  // primary_agent_key
+    // );
+
+    // while (true) {
+    //     if (agent_ptr->current_step == agent_ptr->max_steps) {
+    //         std::cout << "Automatically paused after " << agent_ptr->current_step << " steps." << std::endl;
+    //         std::cout << "Enter your prompt (enter an empty line to resume or 'exit' to quit): ";
+    //         agent_ptr->reset(false);
+    //     } else {
+    //         std::cout << "Enter your prompt (or 'exit' to quit): ";
+    //     }
+
+    //     if (agent_ptr->state != AgentState::IDLE) {
+    //         break;
+    //     }
+        
+    //     std::string prompt;
+    //     std::getline(std::cin, prompt);
+    //     if (prompt == "exit") {
+    //         logger->info("Goodbye!");
+    //         break;
+    //     }
+
+    //     std::cout << "Processing your request..." << std::endl;
+    //     auto result = flow->execute(prompt);
+    //     std::cout << result << std::endl;
+    // }
 }
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 1e66845e8a4306903bd7201a746fc5816cf509b7
+Subproject commit 4536d45a9e18def8d88a41b95b4fa17bc5e574d6
--- a/prompt.cpp
+++ b/prompt.cpp
@ -16,7 +16,9 @@ const char* NEXT_STEP_PROMPT = R"(You can interact with the computer using pytho

 - puppeteer: Open, browse, and get screenshots of web pages using Puppeteer, a headless Chrome browser.

-Based on user needs, proactively select the most appropriate tool or combination of tools. For complex tasks, you can break down the problem and use different tools step by step to solve it. After using each tool, clearly explain the execution results and suggest the next steps.)";
+Based on user needs, proactively select the most appropriate tool or combination of tools. For complex tasks, you can break down the problem and use different tools step by step to solve it. 
+
+After using each tool, clearly explain the execution results and suggest the next steps. If you finish the current step, call `terminate` to switch to next step.)";
 } // namespace humanus

 namespace planning {
--- a/schema.h
+++ b/schema.h
@ -19,7 +19,6 @@ enum class AgentState {
    ERR = 3 // Don't use ERROR
 };

-// 定义全局map
 extern std::map<AgentState, std::string> agent_state_map;

 struct Function {
@ -148,22 +147,11 @@ struct Message {
        return Message("system", content);
    }

-    static Message assistant_message(const json& content) {
-        return Message("assistant", content);
-    }
-
    static Message tool_message(const json& content, const std::string& tool_call_id = "", const std::string& name =  "") {
        return Message("tool", content, name, tool_call_id);
    }

-    /**
-     * @brief Create ToolCallsMessage from raw tool calls.
-     * @param tool_calls Raw tool calls from LLM
-     * @param content Optional message content
-     * @param kwargs Other optional arguments
-     * @return Message with tool calls
-     */
-    static Message from_tool_calls(const std::vector<ToolCall>& tool_calls, const json& content = json::object()) {
+    static Message assistant_message(const json& content = "", const std::vector<ToolCall>& tool_calls = {}) {
        return Message("assistant", content, "", "", tool_calls);
    }
 };
@ -177,7 +165,7 @@ struct Memory {
    // Add a message to the memory
    void add_message(const Message& message) {
        messages.push_back(message);
-        while (!messages.empty() && messages.size() > max_messages || messages.begin()->role == "assistant" || messages.begin()->role == "tool") {
+        while (!messages.empty() && (messages.size() > max_messages || messages.begin()->role == "assistant" || messages.begin()->role == "tool")) {
            // Ensure the first message is always a user or system message
            messages.erase(messages.begin());
        }
--- a/tool/filesystem.h
+++ b/tool/filesystem.h
@ -5,6 +5,7 @@

 namespace humanus {

+// https://github.com/modelcontextprotocol/servers/tree/HEAD/src/filesystem
 struct Filesystem : BaseTool {
    inline static const std::string name_ = "filesystem";
    inline static const std::string description_ = "## Features\n\n- Read/write files\n- Create/list/delete directories\n- Move files/directories\n- Search files\n- Get file metadata";
--- a/tool/planning.cpp
+++ b/tool/planning.cpp
@ -22,7 +22,7 @@ ToolResult PlanningTool::execute(const json& args) {
        std::string title = args.value("title", "");
        std::vector<std::string> steps = args.value("steps", std::vector<std::string>());
        int step_index = args.value("step_index", -1);
-        std::string step_status = args.value("step_status", "");
+        std::string step_status = args.value("step_status", "not_started");
        std::string step_notes = args.value("step_notes", "");
        
        if (command == "create") {
--- a/tool/puppeteer.h
+++ b/tool/puppeteer.h
@ -5,6 +5,7 @@

 namespace humanus {

+// https://github.com/modelcontextprotocol/servers/tree/HEAD/src/puppeteer
 struct Puppeteer : BaseTool {
    inline static const std::string name_ = "puppeteer";
    inline static const std::string description_ = "A Model Context Protocol server that provides browser automation capabilities using Puppeteer.";
@ -105,7 +106,7 @@ struct Puppeteer : BaseTool {
                        // Convert to OAI-complatible image_url format
                        result["content"][i] = {
                            {"type", "image_url"},
-                            {"image_url", {"url", "data:" + mimeType + ";base64," + data}}
+                            {"image_url", {{"url", "data:" + mimeType + ";base64," + data}}}
                        };
                    }
                }
--- a/tool/shell.h
+++ b/tool/shell.h
@ -10,6 +10,7 @@
 namespace humanus {

 // A tool for executing shell commands using MCP shell server
+// https://github.com/kevinwatt/shell-mcp.git
 struct Shell : BaseTool {
    inline static const std::string name_ = "shell";
    inline static const std::string description_ = "Execute a shell command in the terminal.";
@ -40,7 +41,7 @@ struct Shell : BaseTool {
                    "nslookup",
                    "ip",
                    "whereis"
-                ],
+                ]
            },
            "args": {
                "type": "array",
@ -54,6 +55,29 @@ struct Shell : BaseTool {
        "required": ["command"]
    })json");

+    inline static std::set<std::string> allowed_commands = {
+        "ls",
+        "cat", 
+        "pwd",
+        "df",
+        // "echo", // Not working now
+        "ps",
+        "free",
+        "uptime", 
+        "date",
+        "grep",
+        "w",
+        "whois",
+        "find",
+        "netstat",
+        "lspci",
+        "lsusb",
+        "dig",
+        "nslookup",
+        "ip",
+        "whereis"
+    };
+
    Shell() : BaseTool(name_, description_, parameters_) {}

    ToolResult execute(const json& args) override {
@ -63,7 +87,6 @@ struct Shell : BaseTool {
                return ToolError("Failed to initialize shell client");
            }

-            // Handle command parameters
            std::string command;
            if (args.contains("command")) {
                if (args["command"].is_string()) {
@ -72,14 +95,19 @@ struct Shell : BaseTool {
                    return ToolError("Invalid command format");
                }
            } else {
-                return ToolError("Command is required");
+                return ToolError("'command' is required");
            }

-            json tool_args = args;
-            tool_args.erase("command");
+            if (allowed_commands.find(command) == allowed_commands.end()) {
+                return ToolError("Unknown command '" + command + "'. Please use one of the following commands: " + 
+                                 std::accumulate(allowed_commands.begin(), allowed_commands.end(), std::string(),
+                                                 [](const std::string& a, const std::string& b) {
+                                                     return a + (a.empty() ? "" : ", ") + b;
+                                                 }));
+            }
            
            // Call shell tool
-            json result = _client->call_tool("shell_" + command, tool_args);
+            json result = _client->call_tool("shell_" + command, args);
            
            bool is_error = result.value("isError", false);