diff --git a/.gitignore b/.gitignore
index fc17a52..f18ceeb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -80,4 +80,8 @@ examples/server/webui/dist
 /.venv
 __pycache__/
 */poetry.lock
-poetry.toml
\ No newline at end of file
+poetry.toml
+
+# Configs
+
+config/config*.toml
\ No newline at end of file
diff --git a/README.md b/README.md
index b975868..89a6ea5 100644
--- a/README.md
+++ b/README.md
@@ -21,29 +21,39 @@ cmake --build build --config Release
 
 ## How to Run
 
+### Configuration
+
 Switch to your own configration first:
-1. Replace `base_url`, `api_key`, .etc in `config/config_llm.toml` according to your need.
-2. Fill in `args` after `"@modelcontextprotocol/server-filesystem"` for `filesystem` to control the access to files. For example:
+1. Copy configuration files from `config/example` to `config`.
+2. Replace `base_url`, `api_key`, .etc in `config/config_llm.toml` and other configurations in `config/config*.toml` according to your need.
+    > Note: `llama-server` in [llama.cpp](https://github.com/ggml-org/llama.cpp) also support embedding models.
+3. Fill in `args` after `"@modelcontextprotocol/server-filesystem"` for `filesystem` to control the access to files. For example:
 ```
 [filesystem]
 type = "stdio"
 command = "npx"
 args = ["-y",
         "@modelcontextprotocol/server-filesystem",
-        "/Users/{username}/Desktop",
+        "/Users/{Username}/Desktop",
         "other/path/to/your/files]
 ```
 
-Start a MCP server with tool `python_execute` on port 8818:
+### `mcp_server`
+
+(for tools, only `python_execute` as an example now)
+
+Start a MCP server with tool `python_execute` on port 8895 (or pass the port as an argument):
 ```bash
-./build/bin/mcp_server # Unix/MacOS
+./build/bin/mcp_server <port> # Unix/MacOS
 ```
 
 ```shell
-.\build\bin\Release\mcp_server.exe  # Windows
+.\build\bin\Release\mcp_server.exe  <port> # Windows
 ```
 
-Run agent `humanus` with tools `python_execute`, `filesystem` and `playwright` (for browser use):
+### `humanus_cli`
+
+Run with tools `python_execute`, `filesystem` and `playwright` (for browser use):
 
 ```bash
 ./build/bin/humanus_cli # Unix/MacOS
@@ -53,7 +63,9 @@ Run agent `humanus` with tools `python_execute`, `filesystem` and `playwright` (
 .\build\bin\Release\humanus_cli.exe # Windows
 ```
 
-Run experimental planning flow (only agent `humanus` as executor):
+### `humanus_cli_plan` (WIP)
+
+Run planning flow (only agent `humanus` as executor):
 ```bash
 ./build/bin/humanus_cli_plan # Unix/MacOS
 ```
@@ -62,6 +74,43 @@ Run experimental planning flow (only agent `humanus` as executor):
 .\build\bin\Release\humanus_cli_plan.exe # Windows
 ```
 
+### `humanus_server` (WIP)
+
+Run agents in MCP the server (default running on port 8896):
+- `humanus_initialze`: Pass JSON configuration (like in `config/config.toml`) to initialize an agent for a session. (Only one agent will be maintained for each session/client)
+- `humanus_run`: Pass `prompt` to tell the agent what to do. (Only one task at a time)
+- `humanus_terminate`: Stop the current task.
+- `humanus_status`: Get the current states and other information about the agent and the task. Returns:
+  - `state`: Agent state.
+  - `current_step`: Current step index of the agent.
+  - `max_steps`: Maximum steps executing without interaction with the user.
+  - `prompt_tokens`: Prompt (input) tokens consumption.
+  - `completion_tokens`: Completion (output) tokens consumption.
+  - `log_buffer`: Logs in the buffer, like `humanus_cli`. Will be cleared after fetched.
+  - `result`: Explaining what the agent did. Not empty if the task is finished.
+
+```bash
+./build/bin/humanus_server <port> # Unix/MacOS
+```
+
+```shell
+.\build\bin\Release\humanus_cli_plan.exe <port> # Windows
+```
+
+Configure it in Cursor:
+```json
+{
+  "mcpServers": {
+    "humanus": {
+      "url": "http://localhost:8896/sse"
+    }
+  }
+}
+```
+
+> What if add `humanus` to `mcp_servers`? It might be interesting.
+
+
 ## Acknowledgement
 
 <p align="center">
@@ -72,7 +121,7 @@ Run experimental planning flow (only agent `humanus` as executor):
 ## Cite
 
 ```
-@misc{humanuscpp,
+@misc{humanus_cpp,
   author = {Zihong Zhang and Zuchao Li},
   title = {humanus.cpp: A Lightweight C++ Framework for Local LLM Agents},
   year = {2025}
diff --git a/config/config.toml b/config/config.toml
index 73fefba..4ebd83e 100644
--- a/config/config.toml
+++ b/config/config.toml
@@ -1,5 +1,13 @@
 [humanus_cli]
-llm = "qwen-max-latest"
+llm = "qwen-max-latest"                               # Key in config_llm.toml
+memory = "long-context"                               # Key in config_mem.toml
+tools = ["filesystem", "playwright", "image_loader"]  # Builtin tools configuration
+mcp_servers = ["python_execute"]                      # Key in config_mcp.toml, all MCP tools provided by servers will be added
+max_steps = 30                                        # Maximum automatic steps without user's check
+duplicate_threshold = 2                               # Used to detect repeating condition (will be checked by LCS)
+
+[humanus_plan]
+llm = "deepseek-chat"
 memory = "long-context"
 tools = ["filesystem", "playwright", "image_loader"]
 mcp_servers = ["python_execute"]
diff --git a/config/config_embd.toml b/config/config_embd.toml
index 4c725eb..dc4b8bc 100644
--- a/config/config_embd.toml
+++ b/config/config_embd.toml
@@ -1,11 +1,11 @@
 ["nomic-embed-text-v1.5"]
-provider = "oai"
-base_url = "http://localhost:8080"
-endpoint = "/v1/embeddings"
-model = "nomic-embed-text-v1.5.f16.gguf"
-api_key = ""
-embeddings_dim = 768
-max_retries = 3
+provider = "oai"                          # Only support OAI-Compatible style for now
+base_url = "http://localhost:8080"        # Base url. Note: Don't add any endpoint behind
+endpoint = "/v1/embeddings"               # Endpoint of embeddings
+model = "nomic-embed-text-v1.5.f16.gguf"  # Model name
+api_key = ""                              # Your API Key
+embeddings_dim = 768                      # Dimension of embeddings (refer to API docs)
+max_retries = 3                           # Maximum retry count
 
 [qwen-text-embedding-v3]
 provider = "oai"
diff --git a/config/config_llm.toml b/config/config_llm.toml
index 52a44b0..d58a2e6 100644
--- a/config/config_llm.toml
+++ b/config/config_llm.toml
@@ -1,3 +1,9 @@
+[qwen-max]
+model = "qwen-max"                                   # Model name
+base_url = "https://dashscope.aliyuncs.com"          # Base url. Note: Don't add any endpoint behind
+endpoint = "/compatible-mode/v1/chat/completions"    # Endpoint of chat completions
+api_key = "sk-cb1bb2a240d84182bb93f6dd0fe03600"      # Your API Key
+
 [qwen-max-latest]
 model = "qwen-max-latest"
 base_url = "https://dashscope.aliyuncs.com"
@@ -9,7 +15,7 @@ model = "qwen-vl-max-latest"
 base_url = "https://dashscope.aliyuncs.com"
 endpoint = "/compatible-mode/v1/chat/completions"
 api_key = "sk-cb1bb2a240d84182bb93f6dd0fe03600"
-enable_vision = true
+enable_vision = true                                 # This means the model could accept content item like {"image_url", {"url", "xxx"}}
 
 ["claude-3.5-sonnet"]
 model = "anthropic/claude-3.5-sonnet"
diff --git a/config/config_mcp.toml b/config/config_mcp.toml
index f4a6bda..51130ec 100644
--- a/config/config_mcp.toml
+++ b/config/config_mcp.toml
@@ -3,6 +3,7 @@ type = "sse"
 host = "localhost"
 port = 8895
 sse_endpoint = "/sse"
+message_enpoint = "/message"
 
 [puppeteer]
 type = "stdio"
@@ -19,4 +20,4 @@ type = "stdio"
 command = "npx"
 args = ["-y",
         "@modelcontextprotocol/server-filesystem",
-        "/Users/hyde/Desktop"]
\ No newline at end of file
+        "/Users/hyde/Desktop"] # Allowed paths
\ No newline at end of file
diff --git a/config/config_mem.toml b/config/config_mem.toml
index f91ba7c..f4e6513 100644
--- a/config/config_mem.toml
+++ b/config/config_mem.toml
@@ -1,12 +1,12 @@
 [default]
-max_messages = 16
-max_tokens_message = 32768
-max_tokens_messages = 65536
-max_tokens_context = 131072
-retrieval_limit = 32
-embedding_model = "qwen-text-embedding-v3"
-vector_store = "hnswlib"
-llm = "qwen-max-latest"
+max_messages = 16                           # Maximum number of messages in short-term memory
+max_tokens_message = 32768                  # Maximum number of tokens in single message
+max_tokens_messages = 65536                 # Maximum number of tokens in short-term memory
+max_tokens_context = 131072                 # Maximum number of tokens in context (used by `get_messages`)
+retrieval_limit = 32                        # Maximum number of results to retrive from long-term memory
+embedding_model = "qwen-text-embedding-v3"  # Key in config_embd.toml
+vector_store = "hnswlib"                    # Key in config_vec.toml
+llm = "qwen-max-latest"                     # Key in config_llm.toml
 
 [long-context]
 max_messages = 32
diff --git a/config/example/config.toml b/config/example/config.toml
new file mode 100644
index 0000000..69bb88c
--- /dev/null
+++ b/config/example/config.toml
@@ -0,0 +1,15 @@
+[humanus_cli]
+llm = "qwen-max-latest"                               # Key in config_llm.toml
+memory = "default"                                    # Key in config_mem.toml
+tools = ["filesystem", "playwright", "image_loader"]  # Builtin tools configuration
+mcp_servers = ["python_execute"]                      # Key in config_mcp.toml, all MCP tools provided by servers will be added
+max_steps = 30                                        # Maximum automatic steps without user's check
+duplicate_threshold = 2                               # Used to detect repeating condition (will be checked by LCS)
+
+[humanus_plan]
+llm = "deepseek-chat"
+memory = "long-context"
+tools = ["filesystem", "playwright", "image_loader"]
+mcp_servers = ["python_execute"]
+max_steps = 30
+duplicate_threshold = 2
\ No newline at end of file
diff --git a/config/example/config_embd.toml b/config/example/config_embd.toml
new file mode 100644
index 0000000..fae3205
--- /dev/null
+++ b/config/example/config_embd.toml
@@ -0,0 +1,17 @@
+["nomic-embed-text-v1.5"]
+provider = "oai"                          # Only support OAI-Compatible style for now
+base_url = "http://localhost:8080"        # Base url. Note: Don't add any endpoint behind
+endpoint = "/v1/embeddings"               # Endpoint of embeddings
+model = "nomic-embed-text-v1.5.f16.gguf"  # Model name
+api_key = ""                              # Your API Key
+embeddings_dim = 768                      # Dimension of embeddings (refer to API docs)
+max_retries = 3                           # Maximum retry count
+
+[qwen-text-embedding-v3]
+provider = "oai"
+base_url = "https://dashscope.aliyuncs.com"
+endpoint = "/compatible-mode/v1/embeddings"
+model = "text-embedding-v3"
+api_key = "sk-"
+embeddings_dim = 1024
+max_retries = 3
\ No newline at end of file
diff --git a/config/example/config_llm.toml b/config/example/config_llm.toml
new file mode 100644
index 0000000..83a04c0
--- /dev/null
+++ b/config/example/config_llm.toml
@@ -0,0 +1,45 @@
+[qwen-max]
+model = "qwen-max"                                   # Model name
+base_url = "https://dashscope.aliyuncs.com"          # Base url. Note: Don't add any endpoint behind
+endpoint = "/compatible-mode/v1/chat/completions"    # Endpoint of chat completions
+api_key = "sk-"                                      # Your API Key
+
+[qwen-max-latest]
+model = "qwen-max-latest"                            # Model name
+base_url = "https://dashscope.aliyuncs.com"          # Base url. Note: Don't add any endpoint behind
+endpoint = "/compatible-mode/v1/chat/completions"    # Endpoint of chat completions
+api_key = "sk-"                                      # Your API Key
+
+[qwen-vl-max-latest]
+model = "qwen-vl-max-latest"
+base_url = "https://dashscope.aliyuncs.com"
+endpoint = "/compatible-mode/v1/chat/completions"
+api_key = "sk-"
+enable_vision = true                                 # This means the model could accept content item like {"image_url", {"url", "xxx"}}
+
+["claude-3.5-sonnet"]
+model = "anthropic/claude-3.5-sonnet"
+base_url = "https://openrouter.ai"
+endpoint = "/api/v1/chat/completions"
+api_key = "sk-"
+enable_vision = true
+
+["claude-3.7-sonnet"]
+model = "anthropic/claude-3.7-sonnet"
+base_url = "https://openrouter.ai"
+endpoint = "/api/v1/chat/completions"
+api_key = "sk-"
+enable_vision = true
+
+[deepseek-chat]
+model = "deepseek-chat"
+base_url = "https://api.deepseek.com"
+endpoint = "/v1/chat/completions"
+api_key = "sk-"
+
+[deepseek-r1]
+model = "deepseek-reasoner"
+base_url = "https://api.deepseek.com"
+endpoint = "/v1/chat/completions"
+api_key = "sk-"
+enable_tool = false                                # The API provider does not support tool use. Use builtin tool hint template.
diff --git a/config/example/config_mcp.toml b/config/example/config_mcp.toml
new file mode 100644
index 0000000..253797f
--- /dev/null
+++ b/config/example/config_mcp.toml
@@ -0,0 +1,23 @@
+[python_execute]
+type = "sse"
+host = "localhost"
+port = 8895
+sse_endpoint = "/sse"
+message_enpoint = "/message"
+
+[puppeteer]
+type = "stdio"
+command = "npx"
+args = ["-y", "@modelcontextprotocol/server-puppeteer"]
+
+[playwright]
+type = "stdio"
+command = "npx"
+args = ["-y", "@executeautomation/playwright-mcp-server"]
+
+[filesystem]
+type = "stdio"
+command = "npx"
+args = ["-y",
+        "@modelcontextprotocol/server-filesystem",
+        "/Users/{Username}/Desktop"] # Allowed paths
\ No newline at end of file
diff --git a/config/example/config_mem.toml b/config/example/config_mem.toml
new file mode 100644
index 0000000..f4e6513
--- /dev/null
+++ b/config/example/config_mem.toml
@@ -0,0 +1,19 @@
+[default]
+max_messages = 16                           # Maximum number of messages in short-term memory
+max_tokens_message = 32768                  # Maximum number of tokens in single message
+max_tokens_messages = 65536                 # Maximum number of tokens in short-term memory
+max_tokens_context = 131072                 # Maximum number of tokens in context (used by `get_messages`)
+retrieval_limit = 32                        # Maximum number of results to retrive from long-term memory
+embedding_model = "qwen-text-embedding-v3"  # Key in config_embd.toml
+vector_store = "hnswlib"                    # Key in config_vec.toml
+llm = "qwen-max-latest"                     # Key in config_llm.toml
+
+[long-context]
+max_messages = 32
+max_tokens_message = 64000
+max_tokens_messages = 128000
+max_tokens_context = 128000
+retrieval_limit = 32
+embedding_model = "qwen-text-embedding-v3"
+vector_store = "hnswlib"
+llm = "qwen-max-latest"
\ No newline at end of file
diff --git a/config/example/config_vec.toml b/config/example/config_vec.toml
new file mode 100644
index 0000000..4b14f07
--- /dev/null
+++ b/config/example/config_vec.toml
@@ -0,0 +1,8 @@
+[hnswlib]
+provider = "hnswlib"
+dim = 768                    # Dimension of the elements
+max_elements = 100           # Maximum number of elements, should be known beforehand
+M = 16                       # Tightly connected with internal dimensionality of the data
+                             # strongly affects the memory consumption
+ef_construction = 200        # Controls index search speed/build speed tradeoff
+metric = "L2"                # Distance metric to use, can be L2 or IP
\ No newline at end of file
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 7247696..23cdd49 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -56,14 +56,14 @@ int main() {
 
     const auto& config_table = *config_data["humanus_cli"].as_table();
 
-    Humanus agent = Humanus::load_from_toml(config_table);
+    auto agent = std::make_shared<Humanus>(Humanus::load_from_toml(config_table));
 
     while (true) {
-        if (agent.current_step == agent.max_steps) {
-            std::cout << "Automatically paused after " << agent.max_steps << " steps." << std::endl;
+        if (agent->current_step == agent->max_steps) {
+            std::cout << "Automatically paused after " << agent->max_steps << " steps." << std::endl;
             std::cout << "Enter your prompt (enter an empty line to resume or 'exit' to quit): ";
             std::cout.flush();
-            agent.reset(false);
+            agent->reset(false);
         } else {
             std::cout << "Enter your prompt (or 'exit' to quit): ";
             std::cout.flush();
@@ -77,7 +77,7 @@ int main() {
         }
 
         logger->info("Processing your request: " + prompt);
-        agent.run(prompt);
+        agent->run(prompt);
     }
 
     return 0;
diff --git a/examples/plan/humanus_plan.cpp b/examples/plan/humanus_plan.cpp
index 49f95ad..a6147b3 100644
--- a/examples/plan/humanus_plan.cpp
+++ b/examples/plan/humanus_plan.cpp
@@ -57,30 +57,26 @@ int main() {
 
     const auto& config_table = *config_data["humanus_plan"].as_table();
 
-    Humanus agent = Humanus::load_from_toml(config_table);
+    auto agent = std::make_shared<Humanus>(Humanus::load_from_toml(config_table));
 
     std::map<std::string, std::shared_ptr<BaseAgent>> agents;
-    agents["default"] = std::make_shared<Humanus>(agent);
+    agents["default"] = agent;
 
     auto flow = FlowFactory::create_flow(
         FlowType::PLANNING,
-        nullptr,  // llm
-        nullptr,  // planning_tool
-        std::vector<std::string>{},  // executor_keys
-        "",  // active_plan_id
-        agents,  // agents
-        std::vector<std::shared_ptr<BaseTool>>{},  // tools
-        "default"  // primary_agent_key
+        agent->llm,
+        agents,
+        "default" // primary_agent_key
     );
 
     while (true) {
-        if (agent.current_step == agent.max_steps) {
-            std::cout << "Automatically paused after " << agent.current_step << " steps." << std::endl;
+        if (agent->current_step == agent->max_steps) {
+            std::cout << "Automatically paused after " << agent->current_step << " steps." << std::endl;
             std::cout << "Enter your prompt (enter an empty line to resume or 'exit' to quit): ";
-            agent.reset(false);
-        } else if (agent.state != AgentState::IDLE) {
+            agent->reset(false);
+        } else if (agent->state != AgentState::IDLE) {
             std::cout << "Enter your prompt (enter an empty line to retry or 'exit' to quit): ";
-            agent.reset(false);
+            agent->reset(false);
         } else {
             std::cout << "Enter your prompt (or 'exit' to quit): ";
         }
@@ -94,6 +90,6 @@ int main() {
 
         logger->info("Processing your request: " + prompt);
         auto result = flow->execute(prompt);
-        logger->info("🌟 " + agent.name + "'s summary: " + result);
+        logger->info("🌟 " + agent->name + "'s summary: " + result);
     }
 }
\ No newline at end of file
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 156fc02..c974768 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -169,6 +169,8 @@ int main(int argc, char** argv) {
 
         agent->reset();
 
+        session_manager->clear_result(session_id);
+
         std::thread([agent, session_manager, prompt, session_id]() {           
             try {
                 session_sink->set_session_id(session_id); 
@@ -234,7 +236,7 @@ int main(int argc, char** argv) {
             {"max_steps", agent->max_steps},
             {"prompt_tokens", agent->get_prompt_tokens()},
             {"completion_tokens", agent->get_completion_tokens()},
-            {"logs_buffer", session_sink->get_buffer(session_id)},
+            {"log_buffer", session_sink->get_buffer(session_id)},
             {"result", result}
         };
         
diff --git a/flow/base.h b/flow/base.h
index 0689c4b..70e469e 100644
--- a/flow/base.h
+++ b/flow/base.h
@@ -17,19 +17,18 @@ const std::map<FlowType, std::string> FLOW_TYPE_MAP = {
 // Base class for execution flows supporting multiple agents
 struct BaseFlow {
     std::map<std::string, std::shared_ptr<BaseAgent>> agents;
-    std::vector<std::shared_ptr<BaseTool>> tools;
     std::string primary_agent_key;
 
-    BaseFlow(const std::map<std::string, std::shared_ptr<BaseAgent>>& agents = {}, const std::vector<std::shared_ptr<BaseTool>>& tools = {}, const std::string& primary_agent_key = "") 
-    : agents(agents), tools(tools), primary_agent_key(primary_agent_key) {
+    BaseFlow(const std::map<std::string, std::shared_ptr<BaseAgent>>& agents = {}, const std::string& primary_agent_key = "") 
+    : agents(agents), primary_agent_key(primary_agent_key) {
         // If primary agent not specified, use first agent
         if (primary_agent_key.empty() && !agents.empty()) {
             this->primary_agent_key = agents.begin()->first;
         }
     }
 
-    BaseFlow(const std::shared_ptr<BaseAgent>& agent, const std::vector<std::shared_ptr<BaseTool>>& tools = {}, const std::string& primary_agent_key = "") 
-    : tools(tools), primary_agent_key(primary_agent_key) {
+    BaseFlow(const std::shared_ptr<BaseAgent>& agent, const std::string& primary_agent_key = "") 
+    : primary_agent_key(primary_agent_key) {
         agents["default"] = agent;
         // If primary agent not specified, use first agent
         if (primary_agent_key.empty()) {
@@ -37,8 +36,8 @@ struct BaseFlow {
         }
     }
 
-    BaseFlow(const std::vector<std::shared_ptr<BaseAgent>>& agents_list, const std::vector<std::shared_ptr<BaseTool>>& tools = {}, const std::string& primary_agent_key = "")
-    : tools(tools), primary_agent_key(primary_agent_key) {
+    BaseFlow(const std::vector<std::shared_ptr<BaseAgent>>& agents_list, const std::string& primary_agent_key = "")
+    : primary_agent_key(primary_agent_key) {
         for (size_t i = 0; i < agents_list.size(); i++) {
             agents["agent_" + std::to_string(i)] = agents_list[i];
         }
diff --git a/flow/planning.cpp b/flow/planning.cpp
index e9e40f5..40f88cc 100644
--- a/flow/planning.cpp
+++ b/flow/planning.cpp
@@ -10,13 +10,6 @@ std::shared_ptr<BaseAgent> PlanningFlow::get_executor(const std::string& step_ty
         return agents.at(step_type);
     }
 
-    // Otherwise use the first available executor or fall back to primary agent
-    for (const auto& key : executor_keys) {
-        if (agents.find(key) != agents.end()) {
-            return agents.at(key);
-        }
-    }
-
     // Fallback to primary agent
     return primary_agent();
 }
@@ -72,7 +65,7 @@ std::string PlanningFlow::execute(const std::string& input) {
             result += "##" + step_info.value("type", "Step " + std::to_string(current_step_index)) + ":\n" + prefix_sum + "\n\n";
         }
 
-        reset(true); // Clear short-termmemory and state for next plan
+        reset(true); // Clear short-term memory and state for next plan
 
         return result;
     } catch (const std::exception& e) {
@@ -140,10 +133,15 @@ void PlanningFlow::_create_initial_plan(const std::string& request) {
     logger->warn("Creating default plan");
 
     // Create default plan using the ToolCollection
+    auto title = request;
+    if (title.size() > 50) {
+        title = title.substr(0, validate_utf8(title.substr(0, 50))) + "...";
+    }
+
     planning_tool->execute({
         {"command", "create"},
         {"plan_id", active_plan_id},
-        {"title", request.substr(0, std::min(50, static_cast<int>(request.size()))) + (request.size() > 50 ? "..." : "")},
+        {"title", title},
         {"steps", {"Analyze request", "Execute task", "Verify results"}}
     });
 }
diff --git a/flow/planning.h b/flow/planning.h
index 0974b54..6b4ea98 100644
--- a/flow/planning.h
+++ b/flow/planning.h
@@ -16,37 +16,20 @@ namespace humanus {
 struct PlanningFlow : public BaseFlow {
     std::shared_ptr<LLM> llm;
     std::shared_ptr<PlanningTool> planning_tool;
-    std::vector<std::string> executor_keys;
     std::string active_plan_id;
-    int current_step_index = -1;
+    int current_step_index;
 
     PlanningFlow(
         const std::shared_ptr<LLM>& llm = nullptr,
-        const std::shared_ptr<PlanningTool>& planning_tool = nullptr,
-        const std::vector<std::string>& executor_keys = {},
-        const std::string& active_plan_id = "",
         const std::map<std::string, std::shared_ptr<BaseAgent>>& agents = {},
-        const std::vector<std::shared_ptr<BaseTool>>& tools = {}, 
-        const std::string& primary_agent_key = ""
-    ) : BaseFlow(agents, tools, primary_agent_key),
-        llm(llm),
-        planning_tool(planning_tool),
-        executor_keys(executor_keys),
-        active_plan_id(active_plan_id) {
+        const std::string& primary_agent_key = "default"
+    ) : BaseFlow(agents, primary_agent_key),
+        llm(llm) {
         if (!llm) {
-            this->llm = LLM::get_instance("default");
-        }
-        if (!planning_tool) {
-            this->planning_tool = std::make_shared<PlanningTool>();
-        }
-        if (active_plan_id.empty()) {
-            this->active_plan_id = "plan_" + std::to_string(std::chrono::system_clock::now().time_since_epoch().count());
-        }
-        if (executor_keys.empty()) {
-            for (const auto& [key, agent] : agents) {
-                this->executor_keys.push_back(key);
-            }
+            this->llm = primary_agent()->llm;
         }
+        planning_tool = std::make_shared<PlanningTool>();
+        reset();
     }
     
     // Get an appropriate executor agent for the current step.
diff --git a/include/utils.h b/include/utils.h
index 3e4ef09..aaf8011 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -1,6 +1,7 @@
 #ifndef HUMANUS_UTILS_H
 #define HUMANUS_UTILS_H
 
+#include "mcp_message.h"
 #include <filesystem>
 #include <iostream>
 
@@ -14,6 +15,8 @@
 
 namespace humanus {
 
+using json = mcp::json;
+
 // Get project root directory
 inline std::filesystem::path get_project_root() {
     return std::filesystem::path(__FILE__).parent_path().parent_path();
@@ -28,6 +31,9 @@ size_t validate_utf8(const std::string& text);
 
 bool readline_utf8(std::string & line, bool multiline_input = false);
 
+// Parse the content of a message to a string
+std::string parse_json_content(const json& content);
+
 } // namespace humanus
 
 #endif
diff --git a/src/llm.cpp b/src/llm.cpp
index 77bd70a..e48aff7 100644
--- a/src/llm.cpp
+++ b/src/llm.cpp
@@ -78,6 +78,12 @@ json LLM::format_messages(const std::vector<Message>& messages) {
 
     formatted_messages.erase(formatted_messages.begin() + j + 1, formatted_messages.end());
 
+    if (!llm_config_->enable_vision) {
+        for (auto& message : formatted_messages) {
+            message["content"] = parse_json_content(message["content"]); // Images will be replaced by [image1], [image2], ...
+        }
+    }
+
     return formatted_messages;
 }
 
diff --git a/src/utils.cpp b/src/utils.cpp
index 1bac764..254c2d6 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -67,4 +67,24 @@ bool readline_utf8(std::string & line, bool multiline_input) {
     return multiline_input;
 }
 
+// Parse the content of a message to a string
+std::string parse_json_content(const json& content) {
+    if (content.is_string()) {
+        return content.get<std::string>();
+    } else if (content.is_array()) {
+        std::string result;
+        int image_cnt = 0;
+        for (const auto& item : content) {
+            if (item["type"] == "text") {
+                result += item["text"].get<std::string>();
+            } else if (item["type"] == "image_url") {
+                result += "[image" + std::to_string(++image_cnt) + "]";
+            }
+        }
+        return result;
+    } else {
+        return content.dump(2);
+    }
+}
+
 } // namespace humanus
\ No newline at end of file
diff --git a/tool/base.h b/tool/base.h
index 2d9699c..215caa4 100644
--- a/tool/base.h
+++ b/tool/base.h
@@ -3,6 +3,7 @@
 
 #include "schema.h"
 #include "config.h"
+#include "utils.h"
 #include "mcp_stdio_client.h"
 #include "mcp_sse_client.h"
 #include <string>
@@ -51,24 +52,6 @@ struct ToolResult {
         };
     }
 
-    static std::string parse_json_content(const json& content) {
-        if (content.is_string()) {
-            return content.get<std::string>();
-        } else if (content.is_array()) {
-            std::string result;
-            for (const auto& item : content) {
-                if (item["type"] == "text") {
-                    result += item["text"].get<std::string>();
-                } else if (item["type"] == "image_url") {
-                    result += "<image>" + item["image_url"]["url"].get<std::string>() + "</image>";
-                }
-            }
-            return result;
-        } else {
-            return content.dump(2);
-        }
-    }
-
     std::string to_string(int max_length = -1) const {
         std::string result;
         if (!error.empty()) {