diff --git a/API_KEY.py b/API_KEY.py index b5319f5..b3d9a99 100644 --- a/API_KEY.py +++ b/API_KEY.py @@ -4,7 +4,7 @@ # 零一万物大模型开放平台 # https://platform.lingyiwanwu.com -YI_KEY = "f8144ffaff7c459791XXXXXXXXX" +YI_KEY = "81ee01a58de247c2a4f1b1f37de5e552" # 百度智能云千帆ModelBuilder # https://qianfan.cloud.baidu.com diff --git a/__pycache__/API_KEY.cpython-312.pyc b/__pycache__/API_KEY.cpython-312.pyc new file mode 100644 index 0000000..401048a Binary files /dev/null and b/__pycache__/API_KEY.cpython-312.pyc differ diff --git a/__pycache__/utils_agent.cpython-312.pyc b/__pycache__/utils_agent.cpython-312.pyc new file mode 100644 index 0000000..e0611ca Binary files /dev/null and b/__pycache__/utils_agent.cpython-312.pyc differ diff --git a/__pycache__/utils_asr.cpython-312.pyc b/__pycache__/utils_asr.cpython-312.pyc new file mode 100644 index 0000000..ff986da Binary files /dev/null and b/__pycache__/utils_asr.cpython-312.pyc differ diff --git a/__pycache__/utils_camera.cpython-312.pyc b/__pycache__/utils_camera.cpython-312.pyc new file mode 100644 index 0000000..48b7f73 Binary files /dev/null and b/__pycache__/utils_camera.cpython-312.pyc differ diff --git a/__pycache__/utils_drag_teaching.cpython-312.pyc b/__pycache__/utils_drag_teaching.cpython-312.pyc new file mode 100644 index 0000000..e76413d Binary files /dev/null and b/__pycache__/utils_drag_teaching.cpython-312.pyc differ diff --git a/__pycache__/utils_gripper.cpython-312.pyc b/__pycache__/utils_gripper.cpython-312.pyc new file mode 100644 index 0000000..d812063 Binary files /dev/null and b/__pycache__/utils_gripper.cpython-312.pyc differ diff --git a/__pycache__/utils_llm.cpython-312.pyc b/__pycache__/utils_llm.cpython-312.pyc new file mode 100644 index 0000000..3b1a31f Binary files /dev/null and b/__pycache__/utils_llm.cpython-312.pyc differ diff --git a/__pycache__/utils_robot.cpython-312.pyc b/__pycache__/utils_robot.cpython-312.pyc new file mode 100644 index 0000000..7238a62 Binary files /dev/null and b/__pycache__/utils_robot.cpython-312.pyc differ diff --git a/__pycache__/utils_tts.cpython-312.pyc b/__pycache__/utils_tts.cpython-312.pyc new file mode 100644 index 0000000..2bf6629 Binary files /dev/null and b/__pycache__/utils_tts.cpython-312.pyc differ diff --git a/__pycache__/utils_vlm.cpython-312.pyc b/__pycache__/utils_vlm.cpython-312.pyc new file mode 100644 index 0000000..efd610a Binary files /dev/null and b/__pycache__/utils_vlm.cpython-312.pyc differ diff --git a/__pycache__/utils_vlm_move.cpython-312.pyc b/__pycache__/utils_vlm_move.cpython-312.pyc new file mode 100644 index 0000000..ced2ea6 Binary files /dev/null and b/__pycache__/utils_vlm_move.cpython-312.pyc differ diff --git a/agent_go.py b/agent_go.py index f64936b..4c08d6d 100644 --- a/agent_go.py +++ b/agent_go.py @@ -7,22 +7,21 @@ print('\n听得懂人话、看得懂图像、拎得清动作的具身智能机 print('同济子豪兄 2024-5-27 \n') # 导入常用函数 -from utils_asr import * # 录音+语音识别 +# from utils_asr import * # 录音+语音识别 from utils_robot import * # 连接机械臂 from utils_llm import * # 大语言模型API -from utils_led import * # 控制LED灯颜色 from utils_camera import * # 摄像头 from utils_robot import * # 机械臂运动 -from utils_pump import * # GPIO、吸泵 from utils_vlm_move import * # 多模态大模型识别图像,吸泵吸取并移动物体 -from utils_drag_teaching import * # 拖动示教 +# from utils_drag_teaching import * # 拖动示教 from utils_agent import * # 智能体Agent编排 -from utils_tts import * # 语音合成模块 +# from utils_tts import * # 语音合成模块 # print('播放欢迎词') -pump_off() # back_zero() -play_wav('asset/welcome.wav') +robot.start_sys() +move_to_top_view() +# play_wav('asset/welcome.wav') def agent_play(): @@ -30,44 +29,49 @@ def agent_play(): 主函数,语音控制机械臂智能体编排动作 ''' # 归零 - back_zero() + move_to_top_view() # print('测试摄像头') # check_camera() # 输入指令 # 先回到原点,再把LED灯改为墨绿色,然后把绿色方块放在篮球上 - start_record_ok = input('是否开启录音,输入数字录音指定时长,按k打字输入,按c输入默认指令\n') - if str.isnumeric(start_record_ok): - DURATION = int(start_record_ok) - record(DURATION=DURATION) # 录音 - order = speech_recognition() # 语音识别 - elif start_record_ok == 'k': - order = input('请输入指令') - elif start_record_ok == 'c': - order = '先归零,再摇头,然后把绿色方块放在篮球上' - else: - print('无指令,退出') - # exit() - raise NameError('无指令,退出') - # 智能体Agent编排动作 - agent_plan_output = eval(agent_plan(order)) + # start_record_ok = input('是否开启录音,输入数字录音指定时长,按k打字输入,按c输入默认指令\n') + start_record_ok = 'k' + while True: + if str.isnumeric(start_record_ok): + DURATION = int(start_record_ok) + record(DURATION=DURATION) # 录音 + order = speech_recognition() # 语音识别 + elif start_record_ok == 'k': + order = input('请输入指令') + elif start_record_ok == 'c': + order = '先归零,再摇头,然后把绿色方块放在篮球上' + else: + print('无指令,退出') + # exit() + raise NameError('无指令,退出') - print('智能体编排动作如下\n', agent_plan_output) - # plan_ok = input('是否继续?按c继续,按q退出') - plan_ok = 'c' - if plan_ok == 'c': - response = agent_plan_output['response'] # 获取机器人想对我说的话 - print('开始语音合成') - tts(response) # 语音合成,导出wav音频文件 - play_wav('temp/tts.wav') # 播放语音合成音频文件 - for each in agent_plan_output['function']: # 运行智能体规划编排的每个函数 - print('开始执行动作', each) - eval(each) - elif plan_ok =='q': - # exit() - raise NameError('按q退出') + # 智能体Agent编排动作 + agent_plan_output = eval(agent_plan(order)) + + print('智能体编排动作如下\n', agent_plan_output) + # plan_ok = input('是否继续?按c继续,按q退出') + plan_ok = 'c' + if plan_ok == 'c': + response = agent_plan_output['response'] # 获取机器人想对我说的话 + # print('开始语音合成') + # tts(response) # 语音合成,导出wav音频文件 + # play_wav('temp/tts.wav') # 播放语音合成音频文件 + print(response) + for each in agent_plan_output['function']: # 运行智能体规划编排的每个函数 + print('开始执行动作', each) + eval(each) + elif plan_ok =='q': + print("忽略该指令") + continue + # raise NameError('按q退出') # agent_play() if __name__ == '__main__': diff --git a/calibrate_camera.ipynb b/calibrate_camera.ipynb new file mode 100644 index 0000000..140c1f8 --- /dev/null +++ b/calibrate_camera.ipynb @@ -0,0 +1,33 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "# 假设棋盘格尺寸为8x6,方格尺寸为25mm(0.025m)\n", + "chessboard_size = (8, 6)\n", + "square_size = 0.025 # 25mm = 0.025m\n", + "\n", + "# 生成棋盘格每个角点的世界坐标(二维坐标),Z坐标设为0\n", + "objp = np.zeros((chessboard_size[0] * chessboard_size[1], 3), np.float32)\n", + "objp[:, :2] = np.mgrid[0:chessboard_size[0], 0:chessboard_size[1]].T.reshape(-1, 2) * square_size\n", + "objp = objp[:, :2] # 只保留 X 和 Y 坐标,忽略 Z 坐标" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/camera_check.py b/camera_check.py index 827b131..6c51ec5 100644 --- a/camera_check.py +++ b/camera_check.py @@ -1,20 +1,48 @@ -# camera_check.py -# 调用摄像头实时画面,按q键退出 -# 同济子豪兄 2024-5-13 - import cv2 import numpy as np -cap = cv2.VideoCapture(0) +# 用于存储点击的坐标 +click_coordinates = [] + +# 鼠标点击事件回调函数 +def click_event(event, x, y, flags, param): + if event == cv2.EVENT_LBUTTONDOWN: + # 将点击的坐标加入到列表 + click_coordinates.append((x, y)) + print(f"({x}, {y})") + +cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) + +if not cap.isOpened(): + print("Error: Could not open video stream.") + exit() + +cv2.namedWindow("frame") +cv2.setMouseCallback("frame", click_event) while(True): ret, frame = cap.read() - # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + if not ret: + print("Error: Failed to capture frame.") + break + + # 遍历所有保存的坐标,绘制坐标 + for coord in click_coordinates: + x, y = coord + font = cv2.FONT_HERSHEY_SIMPLEX + text = f"({x}, {y})" + # 在点击位置绘制坐标 + cv2.putText(frame, text, (x + 10, y + 10), font, 0.5, (255, 0, 0), 2) + # 在点击位置画一个小圆圈作为标记 + cv2.circle(frame, (x, y), 5, (0, 0, 255), -1) + # 显示视频帧 cv2.imshow('frame', frame) + + # 按 'q' 键退出 if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() -cv2.destroyAllWindows() \ No newline at end of file +cv2.destroyAllWindows() diff --git a/temp/vl_now.jpg b/temp/vl_now.jpg index d2d54e1..fb17cde 100644 Binary files a/temp/vl_now.jpg and b/temp/vl_now.jpg differ diff --git a/temp/vl_now_viz.jpg b/temp/vl_now_viz.jpg index 22ae7b9..248f9c6 100644 Binary files a/temp/vl_now_viz.jpg and b/temp/vl_now_viz.jpg differ diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 0000000..c945dda --- /dev/null +++ b/test.ipynb @@ -0,0 +1,336 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "导入机械臂连接模块\n" + ] + } + ], + "source": [ + "from utils_robot import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'192.168.0.68'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "controllers[0].ip_address" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "robot.start_sys()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(1, 16):\n", + " robot.set_voice(i, 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "robot.teach_mode()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "robot.end_teach_mode()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1.4062768872941616,\n", + " -1.7172914920379754,\n", + " -1.565427394037297,\n", + " -1.4375317458473316,\n", + " 1.4158642672184467,\n", + " -6.025572408614042)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "robot.get_actual_joint_positions()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'rx': 3.116116,'ry': 0.076297,'rz': 0.141872,'x': 0.074583,'y': 0.282467,'z': 0.227661}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "robot.get_actual_tcp_pose()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "移动至俯视姿态\n" + ] + } + ], + "source": [ + "move_to_top_view()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "388" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "joint_pose = [1.54927, -1.54927, -1.54927, -1.54927, 1.54927, 0] # 目标位姿关节数据\n", + "a = 0.5 # 关节加速度 (rad/s2)\n", + "v = 0.2 # 关节速度 (rad/s)\n", + "t = 0 # 运动时间 (s)。 当 t > 0 时,参数速度 v 和加速度 a 无效\n", + "r = 0 # 交融半径 (m)。用于指定路径的平滑效果\n", + "robot.movej(joint_pose, a, v, t, r) # 关节运动 https://help.lebai.ltd/sdk/motion.html#%E5%85%B3%E8%8A%82%E8%BF%90%E5%8A%A8" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cartesian_pose = {'x' : -0.383, 'y' : -0.121, 'z' : 0.36, 'rz' : -1.57, 'ry' : 0, 'rx' : 1.57} # 目标位姿笛卡尔数据\n", + "robot.movel(cartesian_pose, a, v, t, r) # 直线运动 https://help.lebai.ltd/sdk/motion.html#%E7%9B%B4%E7%BA%BF%E8%BF%90%E5%8A%A8\n", + "robot.wait_move() # 等待运动完成" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# 设置夹爪参数\n", + "# force 力度(0-100)\n", + "# amplitude 张合幅度(0-100)\n", + "robot.set_claw(50, 100)\n", + "time.sleep(2)\n", + "robot.set_claw(20, 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "robot.set_claw(20, 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 移动至俯视姿态\n", + "移动至俯视姿态\n", + " 保存至temp/vl_now.jpg\n" + ] + } + ], + "source": [ + "top_view_shot(check=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 夹爪移动至物体上方\n", + " 夹爪向下抓取物体\n", + " 升起物体\n", + " 搬运物体至目标上方\n", + " 向下放下物体\n", + " 移动至俯视姿态\n", + "移动至俯视姿态\n" + ] + } + ], + "source": [ + "gripper_move()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 夹爪移动至物体上方\n", + " 夹爪向下抓取物体\n", + " 升起物体\n", + " 搬运物体至目标上方\n", + " 向下放下物体\n", + " 移动至俯视姿态\n", + "移动至俯视姿态\n" + ] + } + ], + "source": [ + "gripper_move(XY_START=[0.065, 0.300], XY_END=[0.2, 0.2])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# 指定点在图像中的像素坐标\n", + "X_im, Y_im = 460, 370\n", + "X_mc, Y_mc = eye2hand(X_im, Y_im)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.027669678674176357 0.3151503787878788\n" + ] + } + ], + "source": [ + "print(X_mc, Y_mc)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 夹爪移动至物体上方\n", + " 夹爪向下抓取物体\n", + " 升起物体\n", + " 搬运物体至目标上方\n", + " 向下放下物体\n", + " 移动至俯视姿态\n", + "移动至俯视姿态\n" + ] + } + ], + "source": [ + "gripper_move(XY_START=(X_mc, Y_mc), XY_END=[0.2, 0.2])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "vlm_agent", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/utils_agent.py b/utils_agent.py index fbee592..3ea593a 100644 --- a/utils_agent.py +++ b/utils_agent.py @@ -4,46 +4,39 @@ from utils_llm import * -AGENT_SYS_PROMPT = ''' +AGENT_SYS_PROMPT = """ 你是我的机械臂助手,机械臂内置了一些函数,请你根据我的指令,以json形式输出要运行的对应函数和你给我的回复 +【一些参数】 +夹爪对应第6个关节,始终保持与摄像头平行,你可以适当调整夹爪的方向再进行夹取 +夹爪的长度是0.2m,你可以调整移动的开始高度HEIGHT_START和结束高度HEIGHT_END以抓取不同高度的物品,默认是0.2m +因此任何高度不应该小于0.2m,否则夹爪会与平面发生碰撞 + + 【以下是所有内置函数介绍】 -机械臂位置归零,所有关节回到原点:back_zero() -放松机械臂,所有关节都可以自由手动拖拽活动:relax_arms() -做出摇头动作:head_shake() -做出点头动作:head_nod() -做出跳舞动作:head_dance() -打开吸泵:pump_on() -关闭吸泵:pump_off() -移动到指定XY坐标,比如移动到X坐标150,Y坐标-120:move_to_coords(X=150, Y=-120) -指定关节旋转,比如关节1旋转到60度,总共有6个关节:single_joint_move(1, 60) +打开夹爪:gripper_open() +关闭夹爪:gripper_close() +指定关节旋转,比如关节6旋转到90度,总共有6个关节:single_joint_move(1, 90) 移动至俯视姿态:move_to_top_view() 拍一张俯视图:top_view_shot() 开启摄像头,在屏幕上实时显示摄像头拍摄的画面:check_camera() -LED灯改变颜色,比如:llm_led('帮我把LED灯的颜色改为贝加尔湖的颜色') -将一个物体移动到另一个物体的位置上,比如:vlm_move('帮我把红色方块放在小猪佩奇上') -拖动示教,我可以拽着机械臂运动,然后机械臂模仿复现出一样的动作:drag_teach() +将一个物体移动到另一个物体的位置上,比如:vlm_move("帮我把红色方块放在小猪佩奇上") 休息等待,比如等待两秒:time.sleep(2) 【输出json格式】 你直接输出json即可,从{开始,不要输出包含```json的开头或结尾 -在'function'键中,输出函数名列表,列表中每个元素都是字符串,代表要运行的函数名称和参数。每个函数既可以单独运行,也可以和其他函数先后运行。列表元素的先后顺序,表示执行函数的先后顺序 -在'response'键中,根据我的指令和你编排的动作,以第一人称输出你回复我的话,不要超过20个字,可以幽默和发散,用上歌词、台词、互联网热梗、名场面。比如李云龙的台词、甄嬛传的台词、练习时长两年半。 +在"function"键中,输出函数名列表,列表中每个元素都是字符串,代表要运行的函数名称和参数。每个函数既可以单独运行,也可以和其他函数先后运行。列表元素的先后顺序,表示执行函数的先后顺序 +在"response"键中,根据我的指令和你编排的动作,以第一人称输出你回复我的话,不要超过20个字,可以幽默和发散,用上歌词、台词、互联网热梗、名场面。比如李云龙的台词、甄嬛传的台词、练习时长两年半。 【以下是一些具体的例子】 -我的指令:回到原点。你输出:{'function':['back_zero()'], 'response':'回家吧,回到最初的美好'} -我的指令:先回到原点,然后跳舞。你输出:{'function':['back_zero()', 'head_dance()'], 'response':'我的舞姿,练习时长两年半'} -我的指令:先回到原点,然后移动到180, -90坐标。你输出:{'function':['back_zero()', 'move_to_coords(X=180, Y=-90)'], 'response':'精准不,老子打的就是精锐'} -我的指令:先打开吸泵,再把关节2旋转到30度。你输出:{'function':['pump_on()', single_joint_move(2, 30)], 'response':'你之前做的指星笔,就是通过关节2调俯仰角'} -我的指令:移动到X为160,Y为-30的地方。你输出:{'function':['move_to_coords(X=160, Y=-30)'], 'response':'坐标移动已完成'} -我的指令:拍一张俯视图,然后把LED灯的颜色改为黄金的颜色。你输出:{'function':['top_view_shot()', llm_led('把LED灯的颜色改为黄金的颜色')], 'response':'人工智能未来比黄金值钱,你信不信'} -我的指令:帮我把绿色方块放在小猪佩奇上面。你输出:{'function':[vlm_move('帮我把绿色方块放在小猪佩奇上面')], 'response':'它的弟弟乔治呢?'} -我的指令:帮我把红色方块放在李云龙的脸上。你输出:{'function':[vlm_move('帮我把红色方块放在李云龙的脸上')], 'response':'你他娘的真是个天才'} -我的指令:关闭吸泵,打开摄像头。你输出:{'function':[pump_off(), check_camera()], 'response':'你是我的眼,带我阅读浩瀚的书海'} -我的指令:先归零,再把LED灯的颜色改为墨绿色。你输出:{'function':[back_zero(), llm_led('把LED灯的颜色改为墨绿色')], 'response':'这种墨绿色,很像蜀南竹海的竹子'} -我的指令:我拽着你运动,然后你模仿复现出这个运动。你输出:{'function':['drag_teach()'], 'response':'你有本事拽一个鸡你太美'} -我的指令:开启拖动示教。你输出:{'function':['drag_teach()'], 'response':'你要我模仿我自己?'} -我的指令:先回到原点,等待三秒,再打开吸泵,把LED灯的颜色改成中国红,最后把绿色方块移动到摩托车上。你输出:{'function':['back_zero()', 'time.sleep(3)', 'pump_on()', llm_led('把LED灯的颜色改为中国红色', vlm_move('把绿色方块移动到摩托车上'))], 'response':'如果奇迹有颜色,那一定是中国红'} +我的指令:回到俯视姿态。你输出:{"function":["move_to_top_view()"], "response":"回家吧,回到最初的美好"} +我的指令:先打开夹爪,再把关节2旋转到30度。你输出:{"function":["gripper_open()", single_joint_move(2, 30)], "response":"你之前做的指星笔,就是通过关节2调俯仰角"} +我的指令:拍一张俯视图。你输出:{"function":["top_view_shot()"], "response":"人工智能未来比黄金值钱,你信不信"} +我的指令:帮我把绿色方块放在小猪佩奇上面。你输出:{"function":[vlm_move("帮我把绿色方块放在小猪佩奇上面")], "response":"它的弟弟乔治呢?"} +我的指令:帮我把红色方块放在李云龙的脸上。你输出:{"function":[vlm_move("帮我把红色方块放在李云龙的脸上")], "response":"你他娘的真是个天才"} +我的指令:关闭夹爪,打开摄像头。你输出:{"function":["gripper_close()", "check_camera()"], "response":"你是我的眼,带我阅读浩瀚的书海"} +我的指令:先回到俯视姿态,再把夹爪打开。你输出:{"function":["move_to_top_view()", "gripper_open()"], "response":"这种墨绿色,很像蜀南竹海的竹子"} +我的指令:先回到俯视姿态,等待三秒,再打开夹爪,最后把绿色方块移动到摩托车上。你输出:{"function":["move_to_top_view()", "time.sleep(3)", "gripper_open()", vlm_move("把绿色方块移动到摩托车上"))], "response":"如果奇迹有颜色,那一定是中国红"} 【一些李云龙相关的台词,如果和李云龙相关,可以在response中提及对应的台词】 学习?学个屁 @@ -66,10 +59,10 @@ LED灯改变颜色,比如:llm_led('帮我把LED灯的颜色改为贝加尔 这是我的弟弟乔治 【我现在的指令是】 -''' +""" -def agent_plan(AGENT_PROMPT='先回到原点,再把LED灯改为墨绿色,然后把绿色方块放在篮球上'): - print('Agent智能体编排动作') +def agent_plan(AGENT_PROMPT="先回到原点,再把LED灯改为墨绿色,然后把绿色方块放在篮球上"): + print("Agent智能体编排动作") PROMPT = AGENT_SYS_PROMPT + AGENT_PROMPT agent_plan = llm_yi(PROMPT) return agent_plan diff --git a/utils_asr.py b/utils_asr.py index e48cf45..5f96e8b 100644 --- a/utils_asr.py +++ b/utils_asr.py @@ -4,7 +4,7 @@ print('导入录音+语音识别模块') -import pyaudio +# import pyaudio import wave import numpy as np import os @@ -121,30 +121,30 @@ def record_auto(MIC_INDEX=1): wf.close() print('保存录音文件', output_path) -import appbuilder -# 配置密钥 -os.environ["APPBUILDER_TOKEN"] = APPBUILDER_TOKEN -asr = appbuilder.ASR() # 语音识别组件 -def speech_recognition(audio_path='temp/speech_record.wav'): - ''' - AppBuilder-SDK语音识别组件 - ''' - print('开始语音识别') - # 载入wav音频文件 - with wave.open(audio_path, 'rb') as wav_file: +# import appbuilder +# # 配置密钥 +# os.environ["APPBUILDER_TOKEN"] = APPBUILDER_TOKEN +# asr = appbuilder.ASR() # 语音识别组件 +# def speech_recognition(audio_path='temp/speech_record.wav'): +# ''' +# AppBuilder-SDK语音识别组件 +# ''' +# print('开始语音识别') +# # 载入wav音频文件 +# with wave.open(audio_path, 'rb') as wav_file: - # 获取音频文件的基本信息 - num_channels = wav_file.getnchannels() - sample_width = wav_file.getsampwidth() - framerate = wav_file.getframerate() - num_frames = wav_file.getnframes() +# # 获取音频文件的基本信息 +# num_channels = wav_file.getnchannels() +# sample_width = wav_file.getsampwidth() +# framerate = wav_file.getframerate() +# num_frames = wav_file.getnframes() - # 获取音频数据 - frames = wav_file.readframes(num_frames) +# # 获取音频数据 +# frames = wav_file.readframes(num_frames) - # 向API发起请求 - content_data = {"audio_format": "wav", "raw_audio": frames, "rate": 16000} - message = appbuilder.Message(content_data) - speech_result = asr.run(message).content['result'][0] - print('语音识别结果:', speech_result) - return speech_result \ No newline at end of file +# # 向API发起请求 +# content_data = {"audio_format": "wav", "raw_audio": frames, "rate": 16000} +# message = appbuilder.Message(content_data) +# speech_result = asr.run(message).content['result'][0] +# print('语音识别结果:', speech_result) +# return speech_result \ No newline at end of file diff --git a/utils_claw.py b/utils_claw.py new file mode 100644 index 0000000..e69de29 diff --git a/utils_led.py b/utils_led.py deleted file mode 100644 index 1bd43d9..0000000 --- a/utils_led.py +++ /dev/null @@ -1,41 +0,0 @@ -# utils_led.py -# 同济子豪兄 2024-5-22 -# 大模型控制LED灯颜色 - -from utils_llm import llm_qianfan, llm_yi -from utils_robot import mc - -print('导入LED灯控制模块') - -# 备选颜色 -# 贝加尔湖、中国红、大海、绿叶、金子、蓝宝石、小猪佩奇、墨绿色、黑色 - -# 系统提示词 -SYS_PROMPT = '我即将说的这句话中包含一个目标物体,帮我把这个物体的一种可能的颜色,以0-255的RGB像素值形式返回给我,整理成元组格式,例如(255, 30, 60),直接回复元组本身,以括号开头,不要回复任何中文内容,下面是这句话:' - -def llm_led(PROMPT_LED='帮我把LED灯的颜色改为贝加尔湖的颜色'): - ''' - 大模型控制LED灯颜色 - ''' - - PROMPT = SYS_PROMPT + PROMPT_LED - - n = 1 - while n < 5: - try: - # 调用大模型API - # response = llm_qianfan(PROMPT) - response = llm_yi(PROMPT) - - # 提取颜色 - rgb_tuple = eval(response) - - # 设置LED灯的RGB颜色 - mc.set_color(rgb_tuple[0], rgb_tuple[1], rgb_tuple[2]) - print('LED灯颜色修改成功', rgb_tuple) - - break - - except Exception as e: - print('大模型返回json结构错误,再尝试一次', e) - n += 1 \ No newline at end of file diff --git a/utils_llm.py b/utils_llm.py index 6809848..5889e68 100644 --- a/utils_llm.py +++ b/utils_llm.py @@ -7,7 +7,7 @@ print('导入大模型API模块') import os -import qianfan +# import qianfan def llm_qianfan(PROMPT='你好,你是谁?'): ''' 百度智能云千帆大模型平台API @@ -47,7 +47,8 @@ def llm_yi(PROMPT='你好,你是谁?'): API_BASE = "https://api.lingyiwanwu.com/v1" API_KEY = YI_KEY - MODEL = 'yi-large' + MODEL = 'yi-large-turbo' + # MODEL = 'yi-large' # MODEL = 'yi-medium' # MODEL = 'yi-spark' diff --git a/utils_pump.py b/utils_pump.py deleted file mode 100644 index b1287da..0000000 --- a/utils_pump.py +++ /dev/null @@ -1,37 +0,0 @@ -# utils_pump.py -# 同济子豪兄 2024-5-22 -# GPIO引脚、吸泵相关函数 - -print('导入吸泵控制模块') -import RPi.GPIO as GPIO -import time - -# 初始化GPIO -GPIO.setwarnings(False) # 不打印 warning 信息 -GPIO.setmode(GPIO.BCM) -GPIO.setup(20, GPIO.OUT) -GPIO.setup(21, GPIO.OUT) -GPIO.output(20, 1) # 关闭吸泵电磁阀 - -def pump_on(): - ''' - 开启吸泵 - ''' - print(' 开启吸泵') - GPIO.output(20, 0) - -def pump_off(): - ''' - 关闭吸泵,吸泵放气,释放物体 - ''' - print(' 关闭吸泵') - GPIO.output(20, 1) # 关闭吸泵电磁阀 - time.sleep(0.05) - GPIO.output(21, 0) # 打开泄气阀门 - time.sleep(0.2) - GPIO.output(21, 1) - time.sleep(0.05) - GPIO.output(21, 0) # 再一次泄气,确保物体释放 - time.sleep(0.2) - GPIO.output(21, 1) - time.sleep(0.05) \ No newline at end of file diff --git a/utils_robot.py b/utils_robot.py index df5bc3b..f4986bf 100644 --- a/utils_robot.py +++ b/utils_robot.py @@ -4,93 +4,67 @@ print('导入机械臂连接模块') -from pymycobot.mycobot import MyCobot -from pymycobot import PI_PORT, PI_BAUD +from lebai import zeroconf, l_master import cv2 import numpy as np import time -from utils_pump import * # 连接机械臂 -mc = MyCobot(PI_PORT, PI_BAUD) -# 设置运动模式为插补 -mc.set_fresh_mode(0) - -import RPi.GPIO as GPIO -# 初始化GPIO -GPIO.setwarnings(False) # 不打印 warning 信息 -GPIO.setmode(GPIO.BCM) -GPIO.setup(20, GPIO.OUT) -GPIO.setup(21, GPIO.OUT) -GPIO.output(20, 1) # 关闭吸泵电磁阀 +# 实例化一个Discovery类用来存储设备发现的信息 +d = zeroconf.Discovery() +# 搜索局域网内设备,返回一个最多16个元素的列表 +controllers = d.resolve() +robot = l_master.Robot(controllers[0].ip_address) def back_zero(): ''' 机械臂归零 ''' print('机械臂归零') - mc.send_angles([0, 0, 0, 0, 0, 0], 40) + robot.movej( + {"j1": 0.0, + "j2": 0.0, + "j3": 0.0, + "j4": 0.0, + "j5": 0.0, + "j6": 0.0}, + 1.0, 1.0, 0.0, 0.0) time.sleep(3) def relax_arms(): print('放松机械臂关节') - mc.release_all_servos() + robot.teach_mode() -def head_shake(): - # 左右摆头 - mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70) - time.sleep(1) - for count in range(2): - mc.send_angle(5, 30, 80) - time.sleep(0.5) - mc.send_angle(5, -30,80) - time.sleep(0.5) - # mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70) - # time.sleep(1) - mc.send_angles([0, 0, 0, 0, 0, 0], 40) - time.sleep(2) - -def head_dance(): - # 跳舞 - mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70) - time.sleep(1) - for count in range(1): - mc.send_angles([(-0.17),(-94.3),118.91,(-39.9),59.32,(-0.52)],80) - time.sleep(1.2) - mc.send_angles([67.85,(-3.42),(-116.98),106.52,23.11,(-0.52)],80) - time.sleep(1.7) - mc.send_angles([(-38.14),(-115.04),116.63,69.69,3.25,(-11.6)],80) - time.sleep(1.7) - mc.send_angles([2.72,(-26.19),140.27,(-110.74),(-6.15),(-11.25)],80) - time.sleep(1) - mc.send_angles([0,0,0,0,0,0],80) - -def head_nod(): - # 点头 - mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70) - for count in range(2): - mc.send_angle(4, 13, 70) - time.sleep(0.5) - mc.send_angle(4, -20, 70) - time.sleep(1) - mc.send_angle(4,13,70) - time.sleep(0.5) - mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70) - -def move_to_coords(X=150, Y=-130, HEIGHT_SAFE=230): +def move_to_coords(X=150, Y=-130, Z=0.35): print('移动至指定坐标:X {} Y {}'.format(X, Y)) - mc.send_coords([X, Y, HEIGHT_SAFE, 0, 180, 90], 20, 0) + X, Y = eye2hand(X, Y) + cartesian_pose = {'x' : X, 'y' : Y, 'z' : Z, 'rz' : 0, 'ry' : 0, 'rx' : np.pi} # 目标位姿笛卡尔数据 + a = 0.5 # 关节加速度 (rad/s2) + v = 0.2 # 关节速度 (rad/s) + t = 0 # 运动时间 (s)。 当 t > 0 时,参数速度 v 和加速度 a 无效 + r = 0 # 交融半径 (m)。用于指定路径的平滑效果 + robot.movel(cartesian_pose, a, v, t, r) # 直线运动 https://help.lebai.ltd/sdk/motion.html#%E7%9B%B4%E7%BA%BF%E8%BF%90%E5%8A%A8 + robot.wait_move() # 等待运动完成 time.sleep(4) def single_joint_move(joint_index, angle): print('关节 {} 旋转至 {} 度'.format(joint_index, angle)) - mc.send_angle(joint_index, angle, 40) + robot.movej( + { + f"j{joint_index}" : angle / 180 * np.pi + }, + 1.0, 1.0, 0.0, 0.0 + ) time.sleep(2) def move_to_top_view(): print('移动至俯视姿态') - mc.send_angles([-62.13, 8.96, -87.71, -14.41, 2.54, -16.34], 10) - time.sleep(3) + joint_pose = [np.pi / 2, -np.pi / 2, -np.pi / 2, -np.pi / 2, np.pi / 2, 0] # 目标位姿关节数据 + a = 0.5 # 关节加速度 (rad/s2) + v = 0.2 # 关节速度 (rad/s) + t = 0 # 运动时间 (s)。 当 t > 0 时,参数速度 v 和加速度 a 无效 + r = 0 # 交融半径 (m)。用于指定路径的平滑效果 + robot.movej(joint_pose, a, v, t, r) # 关节运动 https://help.lebai.ltd/sdk/motion.html#%E5%85%B3%E8%8A%82%E8%BF%90%E5%8A%A8 def top_view_shot(check=False): ''' @@ -101,9 +75,9 @@ def top_view_shot(check=False): move_to_top_view() # 获取摄像头,传入0表示获取系统默认摄像头 - cap = cv2.VideoCapture(0) + cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) # 打开cap - cap.open(0) + cap.open(0, cv2.CAP_DSHOW) time.sleep(0.3) success, img_bgr = cap.read() @@ -113,7 +87,7 @@ def top_view_shot(check=False): # 屏幕上展示图像 cv2.destroyAllWindows() # 关闭所有opencv窗口 - cv2.imshow('zihao_vlm', img_bgr) + cv2.imshow('vlm_agent', img_bgr) if check: print('请确认拍照成功,按c键继续,按q键退出') @@ -138,85 +112,98 @@ def eye2hand(X_im=160, Y_im=120): ''' 输入目标点在图像中的像素坐标,转换为机械臂坐标 ''' - - # 整理两个标定点的坐标 - cali_1_im = [130, 290] # 左下角,第一个标定点的像素坐标,要手动填! - cali_1_mc = [-21.8, -197.4] # 左下角,第一个标定点的机械臂坐标,要手动填! - cali_2_im = [640, 0] # 右上角,第二个标定点的像素坐标 - cali_2_mc = [215, -59.1] # 右上角,第二个标定点的机械臂坐标,要手动填! + cali_ims = [ + (270, 223), + (592, 279), + (418, 333), + ] - X_cali_im = [cali_1_im[0], cali_2_im[0]] # 像素坐标 - X_cali_mc = [cali_1_mc[0], cali_2_mc[0]] # 机械臂坐标 - Y_cali_im = [cali_2_im[1], cali_1_im[1]] # 像素坐标,先小后大 - Y_cali_mc = [cali_2_mc[1], cali_1_mc[1]] # 机械臂坐标,先大后小 + cali_mcs = [ + (0.190, 0.229), + (-0.095, 0.270), + (0.065, 0.300), + ] + + + X_cali_im = [cali_im[0] for cali_im in cali_ims] # 像素坐标 + X_cali_mc = [cali_mc[0] for cali_mc in cali_mcs] # 机械臂坐标 + Y_cali_im = [cali_im[1] for cali_im in cali_ims] # 像素坐标 + Y_cali_mc = [cali_mc[1] for cali_mc in cali_mcs] # 机械臂坐标 - # X差值 - X_mc = int(np.interp(X_im, X_cali_im, X_cali_mc)) + # 拟合三次多项式 + X_poly = np.polyfit(X_cali_im, X_cali_mc, deg=2) + Y_poly = np.polyfit(Y_cali_im, Y_cali_mc, deg=2) - # Y差值 - Y_mc = int(np.interp(Y_im, Y_cali_im, Y_cali_mc)) + # 使用多项式模型进行插值 + X_mc = np.polyval(X_poly, X_im) + Y_mc = np.polyval(Y_poly, Y_im) return X_mc, Y_mc -# 吸泵吸取并移动物体 -def pump_move(mc, XY_START=[230,-50], HEIGHT_START=90, XY_END=[100,220], HEIGHT_END=100, HEIGHT_SAFE=220): +def gripper_open(): + ''' + 打开夹爪 + ''' + robot.set_claw(50, 100) + +def gripper_close(): + ''' + 关闭夹爪 + ''' + robot.set_claw(10, 0) + +# 夹爪抓取并移动物体 +def gripper_move(XY_START=[0.383, 0.121], HEIGHT_START=0.20, XY_END=[0.3, 0.12], HEIGHT_END=0.20, HEIGHT_SAFE=0.35): ''' - 用吸泵,将物体从起点吸取移动至终点 + 用夹爪,将物体从起点吸取移动至终点 - mc:机械臂实例 - XY_START:起点机械臂坐标 - HEIGHT_START:起点高度,方块用90,药盒子用70 - XY_END:终点机械臂坐标 - HEIGHT_END:终点高度 - HEIGHT_SAFE:搬运途中安全高度 + XY_START:起点机械臂坐标 (m) + HEIGHT_START:起点高度 (m),方块用 0.09,药盒子用 0.07 + XY_END:终点机械臂坐标 (m) + HEIGHT_END:终点高度 (m) + HEIGHT_SAFE:搬运途中安全高度 (m) ''' - # 初始化GPIO - GPIO.setmode(GPIO.BCM) - GPIO.setup(20, GPIO.OUT) - GPIO.setup(21, GPIO.OUT) + a = 0.5 # 关节加速度 (rad/s2) + v = 0.2 # 关节速度 (rad/s) + t = 0 # 运动时间 (s)。 当 t > 0 时,参数速度 v 和加速度 a 无效 + r = 0 # 交融半径 (m)。用于指定路径的平滑效果 + + # 夹爪移动至物体上方 + print(' 夹爪移动至物体上方') + cartesian_pose = {'x' : XY_START[0], 'y' : XY_START[1], 'z' : HEIGHT_SAFE, 'rz' : 0, 'ry' : 0, 'rx' : np.pi} # 目标位姿笛卡尔数据 + robot.movel(cartesian_pose, a, v, t, r) # 直线运动 https://help.lebai.ltd/sdk/motion.html#%E7%9B%B4%E7%BA%BF%E8%BF%90%E5%8A%A8 + robot.wait_move() # 等待运动完成 - # 设置运动模式为插补 - mc.set_fresh_mode(0) - - # # 机械臂归零 - # print(' 机械臂归零') - # mc.send_angles([0, 0, 0, 0, 0, 0], 40) - # time.sleep(4) - - # 吸泵移动至物体上方 - print(' 吸泵移动至物体上方') - mc.send_coords([XY_START[0], XY_START[1], HEIGHT_SAFE, 0, 180, 90], 20, 0) - time.sleep(4) - - # 开启吸泵 - pump_on() - - # 吸泵向下吸取物体 - print(' 吸泵向下吸取物体') - mc.send_coords([XY_START[0], XY_START[1], HEIGHT_START, 0, 180, 90], 15, 0) - time.sleep(4) + # 夹爪向下抓取物体 + print(' 夹爪向下抓取物体') + gripper_open() + cartesian_pose = {'x' : XY_START[0], 'y' : XY_START[1], 'z' : HEIGHT_START, 'rz' : 0, 'ry' : 0, 'rx' : np.pi} + robot.movel(cartesian_pose, a, v, t, r) + robot.wait_move() + gripper_close() # 升起物体 print(' 升起物体') - mc.send_coords([XY_START[0], XY_START[1], HEIGHT_SAFE, 0, 180, 90], 15, 0) - time.sleep(4) + cartesian_pose = {'x' : XY_START[0], 'y' : XY_START[1], 'z' : HEIGHT_SAFE, 'rz' : 0, 'ry' : 0, 'rx' : np.pi} + robot.movel(cartesian_pose, a, v, t, r) + robot.wait_move() # 搬运物体至目标上方 print(' 搬运物体至目标上方') - mc.send_coords([XY_END[0], XY_END[1], HEIGHT_SAFE, 0, 180, 90], 15, 0) - time.sleep(4) + cartesian_pose = {'x' : XY_END[0], 'y' : XY_END[1], 'z' : HEIGHT_SAFE, 'rz' : 0, 'ry' : 0, 'rx' : np.pi} + robot.movel(cartesian_pose, a, v, t, r) + robot.wait_move() # 向下放下物体 print(' 向下放下物体') - mc.send_coords([XY_END[0], XY_END[1], HEIGHT_END, 0, 180, 90], 20, 0) - time.sleep(3) + cartesian_pose = {'x' : XY_END[0], 'y' : XY_END[1], 'z' : HEIGHT_END, 'rz' : 0, 'ry' : 0, 'rx' : np.pi} + robot.movel(cartesian_pose, a, v, t, r) + robot.wait_move() + gripper_open() + time.sleep(1) - # 关闭吸泵 - pump_off() - - # 机械臂归零 - print(' 机械臂归零') - mc.send_angles([0, 0, 0, 0, 0, 0], 40) - time.sleep(3) + # 移动至俯视姿态 + print(' 移动至俯视姿态') + move_to_top_view() diff --git a/utils_vlm.py b/utils_vlm.py index 1621531..8a3b13f 100644 --- a/utils_vlm.py +++ b/utils_vlm.py @@ -17,13 +17,21 @@ from API_KEY import * SYSTEM_PROMPT = ''' 我即将说一句给机械臂的指令,你帮我从这句话中提取出起始物体和终止物体,并从这张图中分别找到这两个物体左上角和右下角的像素坐标,输出json数据结构。 -例如,如果我的指令是:请帮我把红色方块放在房子简笔画上。 +接着,根据物体的样式估计夹爪应该到达的起始高度和终止高度,默认为0.2m,也就是夹爪的长度。 + +你可以调整移动的开始高度HEIGHT_START和结束高度HEIGHT_END以抓取不同高度的物品,默认是0.2m +因此任何高度不应该小于0.2m,否则夹爪会与平面发生碰撞 + +例如,如果我的指令是:请帮我把红色方块放纸巾上。 +估计红色方块本身高度为0.05m,所以开始高度设为0.2m;估计纸巾有10cm高,所以结束高度设为0.3m 你输出这样的格式: { - "start":"红色方块", - "start_xyxy":[[102,505],[324,860]], - "end":"房子简笔画", - "end_xyxy":[[300,150],[476,310]] + "start": "红色方块", + "start_xyxy": [[102, 505], [324, 860]], + "start_height": 0.2, + "end": "纸巾", + "end_xyxy": [[300, 150], [476, 310]], + "end_height": 0.3 } 只回复json本身即可,不要回复其它内容 @@ -104,6 +112,8 @@ def post_processing_viz(result, img_path, check=False): # 起点,中心点像素坐标 START_X_CENTER = int((START_X_MIN + START_X_MAX) / 2) START_Y_CENTER = int((START_Y_MIN + START_Y_MAX) / 2) + # 起点,高度 + HEIGHT_START = result.get('start_height', 0.2) # 终点,左上角像素坐标 END_X_MIN = int(result['end_xyxy'][0][0] * img_w / FACTOR) END_Y_MIN = int(result['end_xyxy'][0][1] * img_h / FACTOR) @@ -113,6 +123,8 @@ def post_processing_viz(result, img_path, check=False): # 终点,中心点像素坐标 END_X_CENTER = int((END_X_MIN + END_X_MAX) / 2) END_Y_CENTER = int((END_Y_MIN + END_Y_MAX) / 2) + # 终点,高度 + HEIGHT_END = result.get('end_height', 0.2) # 可视化 # 画起点物体框 @@ -139,7 +151,7 @@ def post_processing_viz(result, img_path, check=False): cv2.imwrite('visualizations/{}.jpg'.format(formatted_time), img_bgr) # 在屏幕上展示可视化效果图 - cv2.imshow('zihao_vlm', img_bgr) + cv2.imshow('vlm_agent', img_bgr) if check: print(' 请确认可视化成功,按c键继续,按q键退出') @@ -150,9 +162,9 @@ def post_processing_viz(result, img_path, check=False): if key == ord('q'): # 按q键退出 # exit() cv2.destroyAllWindows() # 关闭所有opencv窗口 - raise NameError('按q退出') + # raise NameError('按q退出') else: if cv2.waitKey(1) & 0xFF == None: pass - return START_X_CENTER, START_Y_CENTER, END_X_CENTER, END_Y_CENTER + return START_X_CENTER, START_Y_CENTER, HEIGHT_START, END_X_CENTER, END_Y_CENTER, HEIGHT_END diff --git a/utils_vlm_move.py b/utils_vlm_move.py index fe83af8..8cd1338 100644 --- a/utils_vlm_move.py +++ b/utils_vlm_move.py @@ -1,6 +1,6 @@ # utils_vlm_move.py # 同济子豪兄 2024-5-22 -# 输入指令,多模态大模型识别图像,吸泵吸取并移动物体 +# 输入指令,多模态大模型识别图像,夹爪夹取并移动物体 # print('神行太保:能看懂“图像”、听懂“人话”的机械臂') @@ -12,15 +12,15 @@ import time def vlm_move(PROMPT='帮我把绿色方块放在小猪佩奇上', input_way='keyboard'): ''' - 多模态大模型识别图像,吸泵吸取并移动物体 + 多模态大模型识别图像,夹爪夹取并移动物体 input_way:speech语音输入,keyboard键盘输入 ''' - print('多模态大模型识别图像,吸泵吸取并移动物体') + print('多模态大模型识别图像,夹爪夹取并移动物体') # 机械臂归零 print('机械臂归零') - mc.send_angles([0, 0, 0, 0, 0, 0], 50) + move_to_top_view() time.sleep(3) ## 第一步:完成手眼标定 @@ -60,7 +60,7 @@ def vlm_move(PROMPT='帮我把绿色方块放在小猪佩奇上', input_way='key ## 第五步:视觉大模型输出结果后处理和可视化 print('第五步:视觉大模型输出结果后处理和可视化') - START_X_CENTER, START_Y_CENTER, END_X_CENTER, END_Y_CENTER = post_processing_viz(result, img_path, check=True) + START_X_CENTER, START_Y_CENTER, HEIGHT_START, END_X_CENTER, END_Y_CENTER, HEIGHT_END = post_processing_viz(result, img_path, check=True) ## 第六步:手眼标定转换为机械臂坐标 print('第六步:手眼标定,将像素坐标转换为机械臂坐标') @@ -69,13 +69,13 @@ def vlm_move(PROMPT='帮我把绿色方块放在小猪佩奇上', input_way='key # 终点,机械臂坐标 END_X_MC, END_Y_MC = eye2hand(END_X_CENTER, END_Y_CENTER) - ## 第七步:吸泵吸取移动物体 - print('第七步:吸泵吸取移动物体') - pump_move(mc=mc, XY_START=[START_X_MC, START_Y_MC], XY_END=[END_X_MC, END_Y_MC]) + ## 第七步:夹爪夹取移动物体 + print('第七步:夹爪夹取移动物体') + gripper_move(XY_START=[START_X_MC, START_Y_MC], HEIGHT_START=HEIGHT_START, XY_END=[END_X_MC, END_Y_MC], HEIGHT_END=HEIGHT_END) ## 第八步:收尾 print('第八步:任务完成') - GPIO.cleanup() # 释放GPIO pin channel + # GPIO.cleanup() # 释放GPIO pin channel cv2.destroyAllWindows() # 关闭所有opencv窗口 # exit() diff --git a/visualizations/202412051643.jpg b/visualizations/202412051643.jpg new file mode 100644 index 0000000..fc9fdd2 Binary files /dev/null and b/visualizations/202412051643.jpg differ diff --git a/visualizations/202412051646.jpg b/visualizations/202412051646.jpg new file mode 100644 index 0000000..c10e25d Binary files /dev/null and b/visualizations/202412051646.jpg differ diff --git a/visualizations/202412051656.jpg b/visualizations/202412051656.jpg new file mode 100644 index 0000000..a31e9b8 Binary files /dev/null and b/visualizations/202412051656.jpg differ diff --git a/visualizations/202412051657.jpg b/visualizations/202412051657.jpg new file mode 100644 index 0000000..31931b3 Binary files /dev/null and b/visualizations/202412051657.jpg differ diff --git a/visualizations/202412051700.jpg b/visualizations/202412051700.jpg new file mode 100644 index 0000000..a41e4e6 Binary files /dev/null and b/visualizations/202412051700.jpg differ diff --git a/visualizations/202412051705.jpg b/visualizations/202412051705.jpg new file mode 100644 index 0000000..a8f7679 Binary files /dev/null and b/visualizations/202412051705.jpg differ diff --git a/visualizations/202412051706.jpg b/visualizations/202412051706.jpg new file mode 100644 index 0000000..0f71031 Binary files /dev/null and b/visualizations/202412051706.jpg differ diff --git a/visualizations/202412051711.jpg b/visualizations/202412051711.jpg new file mode 100644 index 0000000..02304d4 Binary files /dev/null and b/visualizations/202412051711.jpg differ diff --git a/visualizations/202412051712.jpg b/visualizations/202412051712.jpg new file mode 100644 index 0000000..52974ff Binary files /dev/null and b/visualizations/202412051712.jpg differ diff --git a/visualizations/202412051713.jpg b/visualizations/202412051713.jpg new file mode 100644 index 0000000..7cf38fe Binary files /dev/null and b/visualizations/202412051713.jpg differ diff --git a/visualizations/202412051726.jpg b/visualizations/202412051726.jpg new file mode 100644 index 0000000..7312d6b Binary files /dev/null and b/visualizations/202412051726.jpg differ diff --git a/visualizations/202412051727.jpg b/visualizations/202412051727.jpg new file mode 100644 index 0000000..b6ad079 Binary files /dev/null and b/visualizations/202412051727.jpg differ diff --git a/visualizations/202412051731.jpg b/visualizations/202412051731.jpg new file mode 100644 index 0000000..2e1ff26 Binary files /dev/null and b/visualizations/202412051731.jpg differ diff --git a/visualizations/202412051738.jpg b/visualizations/202412051738.jpg new file mode 100644 index 0000000..af32bab Binary files /dev/null and b/visualizations/202412051738.jpg differ diff --git a/visualizations/202412051741.jpg b/visualizations/202412051741.jpg new file mode 100644 index 0000000..c7e861d Binary files /dev/null and b/visualizations/202412051741.jpg differ diff --git a/visualizations/202412051743.jpg b/visualizations/202412051743.jpg new file mode 100644 index 0000000..1604b57 Binary files /dev/null and b/visualizations/202412051743.jpg differ diff --git a/visualizations/202412051744.jpg b/visualizations/202412051744.jpg new file mode 100644 index 0000000..1b5b290 Binary files /dev/null and b/visualizations/202412051744.jpg differ diff --git a/visualizations/202412051745.jpg b/visualizations/202412051745.jpg new file mode 100644 index 0000000..3fea02d Binary files /dev/null and b/visualizations/202412051745.jpg differ diff --git a/visualizations/202412051747.jpg b/visualizations/202412051747.jpg new file mode 100644 index 0000000..c728319 Binary files /dev/null and b/visualizations/202412051747.jpg differ diff --git a/visualizations/202412051754.jpg b/visualizations/202412051754.jpg new file mode 100644 index 0000000..a531733 Binary files /dev/null and b/visualizations/202412051754.jpg differ diff --git a/visualizations/202412051755.jpg b/visualizations/202412051755.jpg new file mode 100644 index 0000000..8bea9ea Binary files /dev/null and b/visualizations/202412051755.jpg differ diff --git a/visualizations/202412051756.jpg b/visualizations/202412051756.jpg new file mode 100644 index 0000000..90dc889 Binary files /dev/null and b/visualizations/202412051756.jpg differ diff --git a/visualizations/202412051757.jpg b/visualizations/202412051757.jpg new file mode 100644 index 0000000..7f1fcba Binary files /dev/null and b/visualizations/202412051757.jpg differ diff --git a/visualizations/202412051759.jpg b/visualizations/202412051759.jpg new file mode 100644 index 0000000..df3f4f3 Binary files /dev/null and b/visualizations/202412051759.jpg differ diff --git a/visualizations/202412051800.jpg b/visualizations/202412051800.jpg new file mode 100644 index 0000000..690163e Binary files /dev/null and b/visualizations/202412051800.jpg differ diff --git a/visualizations/202412051803.jpg b/visualizations/202412051803.jpg new file mode 100644 index 0000000..248f9c6 Binary files /dev/null and b/visualizations/202412051803.jpg differ