#ifndef HUMANUS_TOOL_CONTENT_PROVIDER_H #define HUMANUS_TOOL_CONTENT_PROVIDER_H #include "base.h" #include "utils.h" #include #include namespace humanus { struct ContentProvider : BaseTool { inline static const std::string name_ = "content_provider"; inline static const std::string description_ = "Use this tool to save temporary content for later use. For example, you can save a large code file (like HTML) and read it by chunks later."; inline static const json parameters_ = json::parse(R"json( { "type": "object", "properties": { "operation": { "type": "string", "description": "The operation to perform: `write` to save content, `read` to retrieve content", "enum": ["write", "read"] }, "content": { "type": "array", "description": "The content to store. Required when operation is `write`. Format: [{`type`: `text`, `text`: `content`}, {`type`: `image`, `image_url`: {`url`: `image_url`}}]", "items": { "type": "object", "properties": { "type": { "type": "string", "enum": ["text", "image"] }, "text": { "type": "string", "description": "Text content. Required when type is `text`." }, "image_url": { "type": "object", "description": "Image URL information. Required when type is `image`.", "properties": { "url": { "type": "string", "description": "URL of the image" } } } } } }, "cursor": { "type": "string", "description": "The cursor position for reading content. Required when operation is `read`. Use `start` for the beginning or the cursor returned from a previous read." }, "max_chunk_size": { "type": "integer", "description": "Maximum size in characters for each text chunk. Default is 4000.", "default": 4000 } }, "required": ["operation"] } )json"); inline static std::map> content_store_; inline static size_t MAX_STORE_ID = 100; inline static size_t current_id_ = 0; ContentProvider() : BaseTool(name_, description_, parameters_) {} // 将文本分割成合适大小的块 std::vector split_text_into_chunks(const std::string& text, int max_chunk_size) { std::vector chunks; // 如果文本为空，返回空数组 if (text.empty()) { return chunks; } size_t text_length = text.length(); size_t offset = 0; while (offset < text_length) { // 首先确定最大可能的块大小 size_t raw_chunk_size = std::min(static_cast(max_chunk_size), text_length - offset); // 使用 validate_utf8 确保不会截断 UTF-8 字符 std::string potential_chunk = text.substr(offset, raw_chunk_size); size_t valid_utf8_length = validate_utf8(potential_chunk); // 调整为有效的 UTF-8 字符边界 size_t chunk_size = valid_utf8_length; // 如果不是在文本的结尾，并且我们没有因为 UTF-8 截断而减小块大小， // 尝试在空格、换行或标点处分割，以获得更自然的分隔点 if (offset + chunk_size < text_length && chunk_size == raw_chunk_size) { size_t break_pos = offset + chunk_size; // 向后寻找一个合适的分割点 size_t min_pos = offset + valid_utf8_length / 2; // 不要搜索太远，至少保留一半的有效内容 while (break_pos > min_pos && text[break_pos] != ' ' && text[break_pos] != '\n' && text[break_pos] != '.' && text[break_pos] != ',' && text[break_pos] != ';' && text[break_pos] != ':' && text[break_pos] != '!' && text[break_pos] != '?') { break_pos--; } // 如果找到了合适的分割点且不是原始位置 if (break_pos > min_pos) { // 向前移动到分隔符后面的位置 break_pos++; // 检查新的分割点是否会导致 UTF-8 截断 std::string new_chunk = text.substr(offset, break_pos - offset); size_t new_valid_length = validate_utf8(new_chunk); if (new_valid_length == new_chunk.size()) { // 只有在不会截断 UTF-8 字符的情况下使用新的分割点 chunk_size = break_pos - offset; } } } // 创建一个文本块 json chunk; chunk["type"] = "text"; chunk["text"] = text.substr(offset, chunk_size); chunks.push_back(chunk); offset += chunk_size; } return chunks; } // 处理写入操作 ToolResult handle_write(const json& args) { int max_chunk_size = args.value("max_chunk_size", 4000); if (!args.contains("content") || !args["content"].is_array()) { return ToolError("`content` is required and must be an array"); } std::vector processed_content; // 处理内容，分割大型文本 for (const auto& item : args["content"]) { if (!item.contains("type")) { return ToolError("Each content item must have a `type` field"); } std::string type = item["type"]; if (type == "text") { if (!item.contains("text") || !item["text"].is_string()) { return ToolError("Text items must have a `text` field with string value"); } std::string text = item["text"]; auto chunks = split_text_into_chunks(text, max_chunk_size); processed_content.insert(processed_content.end(), chunks.begin(), chunks.end()); } else if (type == "image") { if (!item.contains("image_url") || !item["image_url"].is_object() || !item["image_url"].contains("url") || !item["image_url"]["url"].is_string()) { return ToolError("Image items must have an `image_url` field with a `url` property"); } // 图像保持为一个整体 processed_content.push_back(item); } else { return ToolError("Unsupported content type: " + type); } } // 生成一个唯一的存储ID std::string store_id = "content_" + std::to_string(current_id_); current_id_ = (current_id_ + 1) % MAX_STORE_ID; // 存储处理后的内容 content_store_[store_id] = processed_content; // 返回存储ID和内容项数 json result; result["store_id"] = store_id; result["total_items"] = processed_content.size(); return ToolResult(result); } // 处理读取操作 ToolResult handle_read(const json& args) { if (!args.contains("cursor") || !args["cursor"].is_string()) { return ToolError("`cursor` is required for read operations"); } std::string cursor = args["cursor"]; if (cursor == "start") { // 列出所有可用的存储ID json available_stores = json::array(); for (const auto& [id, content] : content_store_) { json store_info; store_info["store_id"] = id; store_info["total_items"] = content.size(); available_stores.push_back(store_info); } if (available_stores.empty()) { return ToolResult("No content available. Use `write` operation to store content first."); } json result; result["available_stores"] = available_stores; result["next_cursor"] = "select_store"; return ToolResult(result); } else if (cursor == "select_store") { // 用户需要选择一个存储ID return ToolError("Please provide a store_id as cursor in format `store_id:content_X`"); } else if (cursor.find("store_id:") == 0) { // 用户选择了一个存储ID std::string store_id = cursor.substr(9); // 移除 "store_id:" 前缀 if (content_store_.find(store_id) == content_store_.end()) { return ToolError("Store ID `" + store_id + "` not found"); } // 返回该存储的第一个内容项 json result = content_store_[store_id][0]; // 添加导航信息 if (content_store_[store_id].size() > 1) { result["next_cursor"] = store_id + ":1"; result["remaining_items"] = content_store_[store_id].size() - 1; } else { result["next_cursor"] = "end"; result["remaining_items"] = 0; } return ToolResult(result); } else if (cursor.find(":") != std::string::npos) { // 用户正在浏览特定存储内的内容 size_t delimiter_pos = cursor.find(":"); std::string store_id = cursor.substr(0, delimiter_pos); size_t index = std::stoul(cursor.substr(delimiter_pos + 1)); if (content_store_.find(store_id) == content_store_.end()) { return ToolError("Store ID `" + store_id + "` not found"); } if (index >= content_store_[store_id].size()) { return ToolError("Index out of range"); } // 返回请求的内容项 json result = content_store_[store_id][index]; // 添加导航信息 if (index + 1 < content_store_[store_id].size()) { result["next_cursor"] = store_id + ":" + std::to_string(index + 1); result["remaining_items"] = content_store_[store_id].size() - index - 1; } else { result["next_cursor"] = "end"; result["remaining_items"] = 0; } return ToolResult(result); } else if (cursor == "end") { return ToolResult("You have reached the end of the content."); } else { return ToolError("Invalid cursor format"); } } ToolResult execute(const json& args) override { try { if (!args.contains("operation")) { return ToolError("`operation` is required"); } std::string operation = args["operation"]; if (operation == "write") { return handle_write(args); } else if (operation == "read") { return handle_read(args); } else { return ToolError("Unknown operation `" + operation + "`. Please use `write` or `read`"); } } catch (const std::exception& e) { return ToolError(std::string(e.what())); } } }; } // namespace humanus #endif // HUMANUS_TOOL_CONTENT_PROVIDER_H