main
hkr04 2025-05-18 20:56:32 +08:00
parent 68acc666fc
commit ec49749a0e
53 changed files with 639 additions and 323 deletions

View File

@ -4,7 +4,7 @@
# 零一万物大模型开放平台
# https://platform.lingyiwanwu.com
YI_KEY = "f8144ffaff7c459791XXXXXXXXX"
YI_KEY = "81ee01a58de247c2a4f1b1f37de5e552"
# 百度智能云千帆ModelBuilder
# https://qianfan.cloud.baidu.com

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -7,22 +7,21 @@ print('\n听得懂人话、看得懂图像、拎得清动作的具身智能机
print('同济子豪兄 2024-5-27 \n')
# 导入常用函数
from utils_asr import * # 录音+语音识别
# from utils_asr import * # 录音+语音识别
from utils_robot import * # 连接机械臂
from utils_llm import * # 大语言模型API
from utils_led import * # 控制LED灯颜色
from utils_camera import * # 摄像头
from utils_robot import * # 机械臂运动
from utils_pump import * # GPIO、吸泵
from utils_vlm_move import * # 多模态大模型识别图像,吸泵吸取并移动物体
from utils_drag_teaching import * # 拖动示教
# from utils_drag_teaching import * # 拖动示教
from utils_agent import * # 智能体Agent编排
from utils_tts import * # 语音合成模块
# from utils_tts import * # 语音合成模块
# print('播放欢迎词')
pump_off()
# back_zero()
play_wav('asset/welcome.wav')
robot.start_sys()
move_to_top_view()
# play_wav('asset/welcome.wav')
def agent_play():
@ -30,44 +29,49 @@ def agent_play():
主函数语音控制机械臂智能体编排动作
'''
# 归零
back_zero()
move_to_top_view()
# print('测试摄像头')
# check_camera()
# 输入指令
# 先回到原点再把LED灯改为墨绿色然后把绿色方块放在篮球上
start_record_ok = input('是否开启录音输入数字录音指定时长按k打字输入按c输入默认指令\n')
if str.isnumeric(start_record_ok):
DURATION = int(start_record_ok)
record(DURATION=DURATION) # 录音
order = speech_recognition() # 语音识别
elif start_record_ok == 'k':
order = input('请输入指令')
elif start_record_ok == 'c':
order = '先归零,再摇头,然后把绿色方块放在篮球上'
else:
print('无指令,退出')
# exit()
raise NameError('无指令,退出')
# 智能体Agent编排动作
agent_plan_output = eval(agent_plan(order))
# start_record_ok = input('是否开启录音输入数字录音指定时长按k打字输入按c输入默认指令\n')
start_record_ok = 'k'
while True:
if str.isnumeric(start_record_ok):
DURATION = int(start_record_ok)
record(DURATION=DURATION) # 录音
order = speech_recognition() # 语音识别
elif start_record_ok == 'k':
order = input('请输入指令')
elif start_record_ok == 'c':
order = '先归零,再摇头,然后把绿色方块放在篮球上'
else:
print('无指令,退出')
# exit()
raise NameError('无指令,退出')
print('智能体编排动作如下\n', agent_plan_output)
# plan_ok = input('是否继续按c继续按q退出')
plan_ok = 'c'
if plan_ok == 'c':
response = agent_plan_output['response'] # 获取机器人想对我说的话
print('开始语音合成')
tts(response) # 语音合成导出wav音频文件
play_wav('temp/tts.wav') # 播放语音合成音频文件
for each in agent_plan_output['function']: # 运行智能体规划编排的每个函数
print('开始执行动作', each)
eval(each)
elif plan_ok =='q':
# exit()
raise NameError('按q退出')
# 智能体Agent编排动作
agent_plan_output = eval(agent_plan(order))
print('智能体编排动作如下\n', agent_plan_output)
# plan_ok = input('是否继续按c继续按q退出')
plan_ok = 'c'
if plan_ok == 'c':
response = agent_plan_output['response'] # 获取机器人想对我说的话
# print('开始语音合成')
# tts(response) # 语音合成导出wav音频文件
# play_wav('temp/tts.wav') # 播放语音合成音频文件
print(response)
for each in agent_plan_output['function']: # 运行智能体规划编排的每个函数
print('开始执行动作', each)
eval(each)
elif plan_ok =='q':
print("忽略该指令")
continue
# raise NameError('按q退出')
# agent_play()
if __name__ == '__main__':

View File

@ -0,0 +1,33 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"# 假设棋盘格尺寸为8x6方格尺寸为25mm0.025m\n",
"chessboard_size = (8, 6)\n",
"square_size = 0.025 # 25mm = 0.025m\n",
"\n",
"# 生成棋盘格每个角点的世界坐标二维坐标Z坐标设为0\n",
"objp = np.zeros((chessboard_size[0] * chessboard_size[1], 3), np.float32)\n",
"objp[:, :2] = np.mgrid[0:chessboard_size[0], 0:chessboard_size[1]].T.reshape(-1, 2) * square_size\n",
"objp = objp[:, :2] # 只保留 X 和 Y 坐标,忽略 Z 坐标"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -1,20 +1,48 @@
# camera_check.py
# 调用摄像头实时画面按q键退出
# 同济子豪兄 2024-5-13
import cv2
import numpy as np
cap = cv2.VideoCapture(0)
# 用于存储点击的坐标
click_coordinates = []
# 鼠标点击事件回调函数
def click_event(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
# 将点击的坐标加入到列表
click_coordinates.append((x, y))
print(f"({x}, {y})")
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
if not cap.isOpened():
print("Error: Could not open video stream.")
exit()
cv2.namedWindow("frame")
cv2.setMouseCallback("frame", click_event)
while(True):
ret, frame = cap.read()
# gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if not ret:
print("Error: Failed to capture frame.")
break
# 遍历所有保存的坐标,绘制坐标
for coord in click_coordinates:
x, y = coord
font = cv2.FONT_HERSHEY_SIMPLEX
text = f"({x}, {y})"
# 在点击位置绘制坐标
cv2.putText(frame, text, (x + 10, y + 10), font, 0.5, (255, 0, 0), 2)
# 在点击位置画一个小圆圈作为标记
cv2.circle(frame, (x, y), 5, (0, 0, 255), -1)
# 显示视频帧
cv2.imshow('frame', frame)
# 按 'q' 键退出
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
cv2.destroyAllWindows()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 114 KiB

After

Width:  |  Height:  |  Size: 96 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 117 KiB

After

Width:  |  Height:  |  Size: 101 KiB

336
test.ipynb 100644
View File

@ -0,0 +1,336 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"导入机械臂连接模块\n"
]
}
],
"source": [
"from utils_robot import *"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'192.168.0.68'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"controllers[0].ip_address"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"robot.start_sys()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"for i in range(1, 16):\n",
" robot.set_voice(i, 0)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"robot.teach_mode()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"robot.end_teach_mode()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1.4062768872941616,\n",
" -1.7172914920379754,\n",
" -1.565427394037297,\n",
" -1.4375317458473316,\n",
" 1.4158642672184467,\n",
" -6.025572408614042)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"robot.get_actual_joint_positions()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'rx': 3.116116,'ry': 0.076297,'rz': 0.141872,'x': 0.074583,'y': 0.282467,'z': 0.227661}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"robot.get_actual_tcp_pose()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"移动至俯视姿态\n"
]
}
],
"source": [
"move_to_top_view()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"388"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"joint_pose = [1.54927, -1.54927, -1.54927, -1.54927, 1.54927, 0] # 目标位姿关节数据\n",
"a = 0.5 # 关节加速度 (rad/s2)\n",
"v = 0.2 # 关节速度 (rad/s)\n",
"t = 0 # 运动时间 (s)。 当 t > 0 时,参数速度 v 和加速度 a 无效\n",
"r = 0 # 交融半径 (m)。用于指定路径的平滑效果\n",
"robot.movej(joint_pose, a, v, t, r) # 关节运动 https://help.lebai.ltd/sdk/motion.html#%E5%85%B3%E8%8A%82%E8%BF%90%E5%8A%A8"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cartesian_pose = {'x' : -0.383, 'y' : -0.121, 'z' : 0.36, 'rz' : -1.57, 'ry' : 0, 'rx' : 1.57} # 目标位姿笛卡尔数据\n",
"robot.movel(cartesian_pose, a, v, t, r) # 直线运动 https://help.lebai.ltd/sdk/motion.html#%E7%9B%B4%E7%BA%BF%E8%BF%90%E5%8A%A8\n",
"robot.wait_move() # 等待运动完成"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"# 设置夹爪参数\n",
"# force 力度(0-100)\n",
"# amplitude 张合幅度(0-100)\n",
"robot.set_claw(50, 100)\n",
"time.sleep(2)\n",
"robot.set_claw(20, 0)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"robot.set_claw(20, 0)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 移动至俯视姿态\n",
"移动至俯视姿态\n",
" 保存至temp/vl_now.jpg\n"
]
}
],
"source": [
"top_view_shot(check=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 夹爪移动至物体上方\n",
" 夹爪向下抓取物体\n",
" 升起物体\n",
" 搬运物体至目标上方\n",
" 向下放下物体\n",
" 移动至俯视姿态\n",
"移动至俯视姿态\n"
]
}
],
"source": [
"gripper_move()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 夹爪移动至物体上方\n",
" 夹爪向下抓取物体\n",
" 升起物体\n",
" 搬运物体至目标上方\n",
" 向下放下物体\n",
" 移动至俯视姿态\n",
"移动至俯视姿态\n"
]
}
],
"source": [
"gripper_move(XY_START=[0.065, 0.300], XY_END=[0.2, 0.2])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# 指定点在图像中的像素坐标\n",
"X_im, Y_im = 460, 370\n",
"X_mc, Y_mc = eye2hand(X_im, Y_im)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.027669678674176357 0.3151503787878788\n"
]
}
],
"source": [
"print(X_mc, Y_mc)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 夹爪移动至物体上方\n",
" 夹爪向下抓取物体\n",
" 升起物体\n",
" 搬运物体至目标上方\n",
" 向下放下物体\n",
" 移动至俯视姿态\n",
"移动至俯视姿态\n"
]
}
],
"source": [
"gripper_move(XY_START=(X_mc, Y_mc), XY_END=[0.2, 0.2])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "vlm_agent",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -4,46 +4,39 @@
from utils_llm import *
AGENT_SYS_PROMPT = '''
AGENT_SYS_PROMPT = """
你是我的机械臂助手机械臂内置了一些函数请你根据我的指令以json形式输出要运行的对应函数和你给我的回复
一些参数
夹爪对应第6个关节始终保持与摄像头平行你可以适当调整夹爪的方向再进行夹取
夹爪的长度是0.2m你可以调整移动的开始高度HEIGHT_START和结束高度HEIGHT_END以抓取不同高度的物品默认是0.2m
因此任何高度不应该小于0.2m否则夹爪会与平面发生碰撞
以下是所有内置函数介绍
机械臂位置归零所有关节回到原点back_zero()
放松机械臂所有关节都可以自由手动拖拽活动relax_arms()
做出摇头动作head_shake()
做出点头动作head_nod()
做出跳舞动作head_dance()
打开吸泵pump_on()
关闭吸泵pump_off()
移动到指定XY坐标比如移动到X坐标150Y坐标-120move_to_coords(X=150, Y=-120)
指定关节旋转比如关节1旋转到60度总共有6个关节single_joint_move(1, 60)
打开夹爪gripper_open()
关闭夹爪gripper_close()
指定关节旋转比如关节6旋转到90度总共有6个关节single_joint_move(1, 90)
移动至俯视姿态move_to_top_view()
拍一张俯视图top_view_shot()
开启摄像头在屏幕上实时显示摄像头拍摄的画面check_camera()
LED灯改变颜色比如llm_led('帮我把LED灯的颜色改为贝加尔湖的颜色')
将一个物体移动到另一个物体的位置上比如vlm_move('帮我把红色方块放在小猪佩奇上')
拖动示教我可以拽着机械臂运动然后机械臂模仿复现出一样的动作drag_teach()
将一个物体移动到另一个物体的位置上比如vlm_move("帮我把红色方块放在小猪佩奇上")
休息等待比如等待两秒time.sleep(2)
输出json格式
你直接输出json即可{开始不要输出包含```json的开头或结尾
'function'键中输出函数名列表列表中每个元素都是字符串代表要运行的函数名称和参数每个函数既可以单独运行也可以和其他函数先后运行列表元素的先后顺序表示执行函数的先后顺序
'response'键中根据我的指令和你编排的动作以第一人称输出你回复我的话不要超过20个字可以幽默和发散用上歌词台词互联网热梗名场面比如李云龙的台词甄嬛传的台词练习时长两年半
"function"键中输出函数名列表列表中每个元素都是字符串代表要运行的函数名称和参数每个函数既可以单独运行也可以和其他函数先后运行列表元素的先后顺序表示执行函数的先后顺序
"response"键中根据我的指令和你编排的动作以第一人称输出你回复我的话不要超过20个字可以幽默和发散用上歌词台词互联网热梗名场面比如李云龙的台词甄嬛传的台词练习时长两年半
以下是一些具体的例子
我的指令回到原点你输出{'function':['back_zero()'], 'response':'回家吧,回到最初的美好'}
我的指令先回到原点然后跳舞你输出{'function':['back_zero()', 'head_dance()'], 'response':'我的舞姿,练习时长两年半'}
我的指令先回到原点然后移动到180, -90坐标你输出{'function':['back_zero()', 'move_to_coords(X=180, Y=-90)'], 'response':'精准不,老子打的就是精锐'}
我的指令先打开吸泵再把关节2旋转到30度你输出{'function':['pump_on()', single_joint_move(2, 30)], 'response':'你之前做的指星笔就是通过关节2调俯仰角'}
我的指令移动到X为160Y为-30的地方你输出{'function':['move_to_coords(X=160, Y=-30)'], 'response':'坐标移动已完成'}
我的指令拍一张俯视图然后把LED灯的颜色改为黄金的颜色你输出{'function':['top_view_shot()', llm_led('把LED灯的颜色改为黄金的颜色')], 'response':'人工智能未来比黄金值钱,你信不信'}
我的指令帮我把绿色方块放在小猪佩奇上面你输出{'function':[vlm_move('帮我把绿色方块放在小猪佩奇上面')], 'response':'它的弟弟乔治呢?'}
我的指令帮我把红色方块放在李云龙的脸上你输出{'function':[vlm_move('帮我把红色方块放在李云龙的脸上')], 'response':'你他娘的真是个天才'}
我的指令关闭吸泵打开摄像头你输出{'function':[pump_off(), check_camera()], 'response':'你是我的眼,带我阅读浩瀚的书海'}
我的指令先归零再把LED灯的颜色改为墨绿色你输出{'function':[back_zero(), llm_led('把LED灯的颜色改为墨绿色')], 'response':'这种墨绿色,很像蜀南竹海的竹子'}
我的指令我拽着你运动然后你模仿复现出这个运动你输出{'function':['drag_teach()'], 'response':'你有本事拽一个鸡你太美'}
我的指令开启拖动示教你输出{'function':['drag_teach()'], 'response':'你要我模仿我自己?'}
我的指令先回到原点等待三秒再打开吸泵把LED灯的颜色改成中国红最后把绿色方块移动到摩托车上你输出{'function':['back_zero()', 'time.sleep(3)', 'pump_on()', llm_led('把LED灯的颜色改为中国红色', vlm_move('把绿色方块移动到摩托车上'))], 'response':'如果奇迹有颜色,那一定是中国红'}
我的指令回到俯视姿态你输出{"function":["move_to_top_view()"], "response":"回家吧,回到最初的美好"}
我的指令先打开夹爪再把关节2旋转到30度你输出{"function":["gripper_open()", single_joint_move(2, 30)], "response":"你之前做的指星笔就是通过关节2调俯仰角"}
我的指令拍一张俯视图你输出{"function":["top_view_shot()"], "response":"人工智能未来比黄金值钱,你信不信"}
我的指令帮我把绿色方块放在小猪佩奇上面你输出{"function":[vlm_move("帮我把绿色方块放在小猪佩奇上面")], "response":"它的弟弟乔治呢?"}
我的指令帮我把红色方块放在李云龙的脸上你输出{"function":[vlm_move("帮我把红色方块放在李云龙的脸上")], "response":"你他娘的真是个天才"}
我的指令关闭夹爪打开摄像头你输出{"function":["gripper_close()", "check_camera()"], "response":"你是我的眼,带我阅读浩瀚的书海"}
我的指令先回到俯视姿态再把夹爪打开你输出{"function":["move_to_top_view()", "gripper_open()"], "response":"这种墨绿色,很像蜀南竹海的竹子"}
我的指令先回到俯视姿态等待三秒再打开夹爪最后把绿色方块移动到摩托车上你输出{"function":["move_to_top_view()", "time.sleep(3)", "gripper_open()", vlm_move("把绿色方块移动到摩托车上"))], "response":"如果奇迹有颜色,那一定是中国红"}
一些李云龙相关的台词如果和李云龙相关可以在response中提及对应的台词
学习学个屁
@ -66,10 +59,10 @@ LED灯改变颜色比如llm_led('帮我把LED灯的颜色改为贝加尔
这是我的弟弟乔治
我现在的指令是
'''
"""
def agent_plan(AGENT_PROMPT='先回到原点再把LED灯改为墨绿色然后把绿色方块放在篮球上'):
print('Agent智能体编排动作')
def agent_plan(AGENT_PROMPT="先回到原点再把LED灯改为墨绿色然后把绿色方块放在篮球上"):
print("Agent智能体编排动作")
PROMPT = AGENT_SYS_PROMPT + AGENT_PROMPT
agent_plan = llm_yi(PROMPT)
return agent_plan

View File

@ -4,7 +4,7 @@
print('导入录音+语音识别模块')
import pyaudio
# import pyaudio
import wave
import numpy as np
import os
@ -121,30 +121,30 @@ def record_auto(MIC_INDEX=1):
wf.close()
print('保存录音文件', output_path)
import appbuilder
# 配置密钥
os.environ["APPBUILDER_TOKEN"] = APPBUILDER_TOKEN
asr = appbuilder.ASR() # 语音识别组件
def speech_recognition(audio_path='temp/speech_record.wav'):
'''
AppBuilder-SDK语音识别组件
'''
print('开始语音识别')
# 载入wav音频文件
with wave.open(audio_path, 'rb') as wav_file:
# import appbuilder
# # 配置密钥
# os.environ["APPBUILDER_TOKEN"] = APPBUILDER_TOKEN
# asr = appbuilder.ASR() # 语音识别组件
# def speech_recognition(audio_path='temp/speech_record.wav'):
# '''
# AppBuilder-SDK语音识别组件
# '''
# print('开始语音识别')
# # 载入wav音频文件
# with wave.open(audio_path, 'rb') as wav_file:
# 获取音频文件的基本信息
num_channels = wav_file.getnchannels()
sample_width = wav_file.getsampwidth()
framerate = wav_file.getframerate()
num_frames = wav_file.getnframes()
# # 获取音频文件的基本信息
# num_channels = wav_file.getnchannels()
# sample_width = wav_file.getsampwidth()
# framerate = wav_file.getframerate()
# num_frames = wav_file.getnframes()
# 获取音频数据
frames = wav_file.readframes(num_frames)
# # 获取音频数据
# frames = wav_file.readframes(num_frames)
# 向API发起请求
content_data = {"audio_format": "wav", "raw_audio": frames, "rate": 16000}
message = appbuilder.Message(content_data)
speech_result = asr.run(message).content['result'][0]
print('语音识别结果:', speech_result)
return speech_result
# # 向API发起请求
# content_data = {"audio_format": "wav", "raw_audio": frames, "rate": 16000}
# message = appbuilder.Message(content_data)
# speech_result = asr.run(message).content['result'][0]
# print('语音识别结果:', speech_result)
# return speech_result

0
utils_claw.py 100644
View File

View File

@ -1,41 +0,0 @@
# utils_led.py
# 同济子豪兄 2024-5-22
# 大模型控制LED灯颜色
from utils_llm import llm_qianfan, llm_yi
from utils_robot import mc
print('导入LED灯控制模块')
# 备选颜色
# 贝加尔湖、中国红、大海、绿叶、金子、蓝宝石、小猪佩奇、墨绿色、黑色
# 系统提示词
SYS_PROMPT = '我即将说的这句话中包含一个目标物体帮我把这个物体的一种可能的颜色以0-255的RGB像素值形式返回给我整理成元组格式例如(255, 30, 60),直接回复元组本身,以括号开头,不要回复任何中文内容,下面是这句话:'
def llm_led(PROMPT_LED='帮我把LED灯的颜色改为贝加尔湖的颜色'):
'''
大模型控制LED灯颜色
'''
PROMPT = SYS_PROMPT + PROMPT_LED
n = 1
while n < 5:
try:
# 调用大模型API
# response = llm_qianfan(PROMPT)
response = llm_yi(PROMPT)
# 提取颜色
rgb_tuple = eval(response)
# 设置LED灯的RGB颜色
mc.set_color(rgb_tuple[0], rgb_tuple[1], rgb_tuple[2])
print('LED灯颜色修改成功', rgb_tuple)
break
except Exception as e:
print('大模型返回json结构错误再尝试一次', e)
n += 1

View File

@ -7,7 +7,7 @@ print('导入大模型API模块')
import os
import qianfan
# import qianfan
def llm_qianfan(PROMPT='你好,你是谁?'):
'''
百度智能云千帆大模型平台API
@ -47,7 +47,8 @@ def llm_yi(PROMPT='你好,你是谁?'):
API_BASE = "https://api.lingyiwanwu.com/v1"
API_KEY = YI_KEY
MODEL = 'yi-large'
MODEL = 'yi-large-turbo'
# MODEL = 'yi-large'
# MODEL = 'yi-medium'
# MODEL = 'yi-spark'

View File

@ -1,37 +0,0 @@
# utils_pump.py
# 同济子豪兄 2024-5-22
# GPIO引脚、吸泵相关函数
print('导入吸泵控制模块')
import RPi.GPIO as GPIO
import time
# 初始化GPIO
GPIO.setwarnings(False) # 不打印 warning 信息
GPIO.setmode(GPIO.BCM)
GPIO.setup(20, GPIO.OUT)
GPIO.setup(21, GPIO.OUT)
GPIO.output(20, 1) # 关闭吸泵电磁阀
def pump_on():
'''
开启吸泵
'''
print(' 开启吸泵')
GPIO.output(20, 0)
def pump_off():
'''
关闭吸泵吸泵放气释放物体
'''
print(' 关闭吸泵')
GPIO.output(20, 1) # 关闭吸泵电磁阀
time.sleep(0.05)
GPIO.output(21, 0) # 打开泄气阀门
time.sleep(0.2)
GPIO.output(21, 1)
time.sleep(0.05)
GPIO.output(21, 0) # 再一次泄气,确保物体释放
time.sleep(0.2)
GPIO.output(21, 1)
time.sleep(0.05)

View File

@ -4,93 +4,67 @@
print('导入机械臂连接模块')
from pymycobot.mycobot import MyCobot
from pymycobot import PI_PORT, PI_BAUD
from lebai import zeroconf, l_master
import cv2
import numpy as np
import time
from utils_pump import *
# 连接机械臂
mc = MyCobot(PI_PORT, PI_BAUD)
# 设置运动模式为插补
mc.set_fresh_mode(0)
import RPi.GPIO as GPIO
# 初始化GPIO
GPIO.setwarnings(False) # 不打印 warning 信息
GPIO.setmode(GPIO.BCM)
GPIO.setup(20, GPIO.OUT)
GPIO.setup(21, GPIO.OUT)
GPIO.output(20, 1) # 关闭吸泵电磁阀
# 实例化一个Discovery类用来存储设备发现的信息
d = zeroconf.Discovery()
# 搜索局域网内设备返回一个最多16个元素的列表
controllers = d.resolve()
robot = l_master.Robot(controllers[0].ip_address)
def back_zero():
'''
机械臂归零
'''
print('机械臂归零')
mc.send_angles([0, 0, 0, 0, 0, 0], 40)
robot.movej(
{"j1": 0.0,
"j2": 0.0,
"j3": 0.0,
"j4": 0.0,
"j5": 0.0,
"j6": 0.0},
1.0, 1.0, 0.0, 0.0)
time.sleep(3)
def relax_arms():
print('放松机械臂关节')
mc.release_all_servos()
robot.teach_mode()
def head_shake():
# 左右摆头
mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70)
time.sleep(1)
for count in range(2):
mc.send_angle(5, 30, 80)
time.sleep(0.5)
mc.send_angle(5, -30,80)
time.sleep(0.5)
# mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70)
# time.sleep(1)
mc.send_angles([0, 0, 0, 0, 0, 0], 40)
time.sleep(2)
def head_dance():
# 跳舞
mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70)
time.sleep(1)
for count in range(1):
mc.send_angles([(-0.17),(-94.3),118.91,(-39.9),59.32,(-0.52)],80)
time.sleep(1.2)
mc.send_angles([67.85,(-3.42),(-116.98),106.52,23.11,(-0.52)],80)
time.sleep(1.7)
mc.send_angles([(-38.14),(-115.04),116.63,69.69,3.25,(-11.6)],80)
time.sleep(1.7)
mc.send_angles([2.72,(-26.19),140.27,(-110.74),(-6.15),(-11.25)],80)
time.sleep(1)
mc.send_angles([0,0,0,0,0,0],80)
def head_nod():
# 点头
mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70)
for count in range(2):
mc.send_angle(4, 13, 70)
time.sleep(0.5)
mc.send_angle(4, -20, 70)
time.sleep(1)
mc.send_angle(4,13,70)
time.sleep(0.5)
mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70)
def move_to_coords(X=150, Y=-130, HEIGHT_SAFE=230):
def move_to_coords(X=150, Y=-130, Z=0.35):
print('移动至指定坐标X {} Y {}'.format(X, Y))
mc.send_coords([X, Y, HEIGHT_SAFE, 0, 180, 90], 20, 0)
X, Y = eye2hand(X, Y)
cartesian_pose = {'x' : X, 'y' : Y, 'z' : Z, 'rz' : 0, 'ry' : 0, 'rx' : np.pi} # 目标位姿笛卡尔数据
a = 0.5 # 关节加速度 (rad/s2)
v = 0.2 # 关节速度 (rad/s)
t = 0 # 运动时间 (s)。 当 t > 0 时,参数速度 v 和加速度 a 无效
r = 0 # 交融半径 (m)。用于指定路径的平滑效果
robot.movel(cartesian_pose, a, v, t, r) # 直线运动 https://help.lebai.ltd/sdk/motion.html#%E7%9B%B4%E7%BA%BF%E8%BF%90%E5%8A%A8
robot.wait_move() # 等待运动完成
time.sleep(4)
def single_joint_move(joint_index, angle):
print('关节 {} 旋转至 {}'.format(joint_index, angle))
mc.send_angle(joint_index, angle, 40)
robot.movej(
{
f"j{joint_index}" : angle / 180 * np.pi
},
1.0, 1.0, 0.0, 0.0
)
time.sleep(2)
def move_to_top_view():
print('移动至俯视姿态')
mc.send_angles([-62.13, 8.96, -87.71, -14.41, 2.54, -16.34], 10)
time.sleep(3)
joint_pose = [np.pi / 2, -np.pi / 2, -np.pi / 2, -np.pi / 2, np.pi / 2, 0] # 目标位姿关节数据
a = 0.5 # 关节加速度 (rad/s2)
v = 0.2 # 关节速度 (rad/s)
t = 0 # 运动时间 (s)。 当 t > 0 时,参数速度 v 和加速度 a 无效
r = 0 # 交融半径 (m)。用于指定路径的平滑效果
robot.movej(joint_pose, a, v, t, r) # 关节运动 https://help.lebai.ltd/sdk/motion.html#%E5%85%B3%E8%8A%82%E8%BF%90%E5%8A%A8
def top_view_shot(check=False):
'''
@ -101,9 +75,9 @@ def top_view_shot(check=False):
move_to_top_view()
# 获取摄像头传入0表示获取系统默认摄像头
cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
# 打开cap
cap.open(0)
cap.open(0, cv2.CAP_DSHOW)
time.sleep(0.3)
success, img_bgr = cap.read()
@ -113,7 +87,7 @@ def top_view_shot(check=False):
# 屏幕上展示图像
cv2.destroyAllWindows() # 关闭所有opencv窗口
cv2.imshow('zihao_vlm', img_bgr)
cv2.imshow('vlm_agent', img_bgr)
if check:
print('请确认拍照成功按c键继续按q键退出')
@ -138,85 +112,98 @@ def eye2hand(X_im=160, Y_im=120):
'''
输入目标点在图像中的像素坐标转换为机械臂坐标
'''
# 整理两个标定点的坐标
cali_1_im = [130, 290] # 左下角,第一个标定点的像素坐标,要手动填!
cali_1_mc = [-21.8, -197.4] # 左下角,第一个标定点的机械臂坐标,要手动填!
cali_2_im = [640, 0] # 右上角,第二个标定点的像素坐标
cali_2_mc = [215, -59.1] # 右上角,第二个标定点的机械臂坐标,要手动填!
cali_ims = [
(270, 223),
(592, 279),
(418, 333),
]
X_cali_im = [cali_1_im[0], cali_2_im[0]] # 像素坐标
X_cali_mc = [cali_1_mc[0], cali_2_mc[0]] # 机械臂坐标
Y_cali_im = [cali_2_im[1], cali_1_im[1]] # 像素坐标,先小后大
Y_cali_mc = [cali_2_mc[1], cali_1_mc[1]] # 机械臂坐标,先大后小
cali_mcs = [
(0.190, 0.229),
(-0.095, 0.270),
(0.065, 0.300),
]
X_cali_im = [cali_im[0] for cali_im in cali_ims] # 像素坐标
X_cali_mc = [cali_mc[0] for cali_mc in cali_mcs] # 机械臂坐标
Y_cali_im = [cali_im[1] for cali_im in cali_ims] # 像素坐标
Y_cali_mc = [cali_mc[1] for cali_mc in cali_mcs] # 机械臂坐标
# X差值
X_mc = int(np.interp(X_im, X_cali_im, X_cali_mc))
# 拟合三次多项式
X_poly = np.polyfit(X_cali_im, X_cali_mc, deg=2)
Y_poly = np.polyfit(Y_cali_im, Y_cali_mc, deg=2)
# Y差值
Y_mc = int(np.interp(Y_im, Y_cali_im, Y_cali_mc))
# 使用多项式模型进行插值
X_mc = np.polyval(X_poly, X_im)
Y_mc = np.polyval(Y_poly, Y_im)
return X_mc, Y_mc
# 吸泵吸取并移动物体
def pump_move(mc, XY_START=[230,-50], HEIGHT_START=90, XY_END=[100,220], HEIGHT_END=100, HEIGHT_SAFE=220):
def gripper_open():
'''
打开夹爪
'''
robot.set_claw(50, 100)
def gripper_close():
'''
关闭夹爪
'''
robot.set_claw(10, 0)
# 夹爪抓取并移动物体
def gripper_move(XY_START=[0.383, 0.121], HEIGHT_START=0.20, XY_END=[0.3, 0.12], HEIGHT_END=0.20, HEIGHT_SAFE=0.35):
'''
用吸泵将物体从起点吸取移动至终点
夹爪将物体从起点吸取移动至终点
mc机械臂实例
XY_START起点机械臂坐标
HEIGHT_START起点高度方块用90药盒子用70
XY_END终点机械臂坐标
HEIGHT_END终点高度
HEIGHT_SAFE搬运途中安全高度
XY_START起点机械臂坐标 (m)
HEIGHT_START起点高度 (m)方块用 0.09药盒子用 0.07
XY_END终点机械臂坐标 (m)
HEIGHT_END终点高度 (m)
HEIGHT_SAFE搬运途中安全高度 (m)
'''
# 初始化GPIO
GPIO.setmode(GPIO.BCM)
GPIO.setup(20, GPIO.OUT)
GPIO.setup(21, GPIO.OUT)
a = 0.5 # 关节加速度 (rad/s2)
v = 0.2 # 关节速度 (rad/s)
t = 0 # 运动时间 (s)。 当 t > 0 时,参数速度 v 和加速度 a 无效
r = 0 # 交融半径 (m)。用于指定路径的平滑效果
# 夹爪移动至物体上方
print(' 夹爪移动至物体上方')
cartesian_pose = {'x' : XY_START[0], 'y' : XY_START[1], 'z' : HEIGHT_SAFE, 'rz' : 0, 'ry' : 0, 'rx' : np.pi} # 目标位姿笛卡尔数据
robot.movel(cartesian_pose, a, v, t, r) # 直线运动 https://help.lebai.ltd/sdk/motion.html#%E7%9B%B4%E7%BA%BF%E8%BF%90%E5%8A%A8
robot.wait_move() # 等待运动完成
# 设置运动模式为插补
mc.set_fresh_mode(0)
# # 机械臂归零
# print(' 机械臂归零')
# mc.send_angles([0, 0, 0, 0, 0, 0], 40)
# time.sleep(4)
# 吸泵移动至物体上方
print(' 吸泵移动至物体上方')
mc.send_coords([XY_START[0], XY_START[1], HEIGHT_SAFE, 0, 180, 90], 20, 0)
time.sleep(4)
# 开启吸泵
pump_on()
# 吸泵向下吸取物体
print(' 吸泵向下吸取物体')
mc.send_coords([XY_START[0], XY_START[1], HEIGHT_START, 0, 180, 90], 15, 0)
time.sleep(4)
# 夹爪向下抓取物体
print(' 夹爪向下抓取物体')
gripper_open()
cartesian_pose = {'x' : XY_START[0], 'y' : XY_START[1], 'z' : HEIGHT_START, 'rz' : 0, 'ry' : 0, 'rx' : np.pi}
robot.movel(cartesian_pose, a, v, t, r)
robot.wait_move()
gripper_close()
# 升起物体
print(' 升起物体')
mc.send_coords([XY_START[0], XY_START[1], HEIGHT_SAFE, 0, 180, 90], 15, 0)
time.sleep(4)
cartesian_pose = {'x' : XY_START[0], 'y' : XY_START[1], 'z' : HEIGHT_SAFE, 'rz' : 0, 'ry' : 0, 'rx' : np.pi}
robot.movel(cartesian_pose, a, v, t, r)
robot.wait_move()
# 搬运物体至目标上方
print(' 搬运物体至目标上方')
mc.send_coords([XY_END[0], XY_END[1], HEIGHT_SAFE, 0, 180, 90], 15, 0)
time.sleep(4)
cartesian_pose = {'x' : XY_END[0], 'y' : XY_END[1], 'z' : HEIGHT_SAFE, 'rz' : 0, 'ry' : 0, 'rx' : np.pi}
robot.movel(cartesian_pose, a, v, t, r)
robot.wait_move()
# 向下放下物体
print(' 向下放下物体')
mc.send_coords([XY_END[0], XY_END[1], HEIGHT_END, 0, 180, 90], 20, 0)
time.sleep(3)
cartesian_pose = {'x' : XY_END[0], 'y' : XY_END[1], 'z' : HEIGHT_END, 'rz' : 0, 'ry' : 0, 'rx' : np.pi}
robot.movel(cartesian_pose, a, v, t, r)
robot.wait_move()
gripper_open()
time.sleep(1)
# 关闭吸泵
pump_off()
# 机械臂归零
print(' 机械臂归零')
mc.send_angles([0, 0, 0, 0, 0, 0], 40)
time.sleep(3)
# 移动至俯视姿态
print(' 移动至俯视姿态')
move_to_top_view()

View File

@ -17,13 +17,21 @@ from API_KEY import *
SYSTEM_PROMPT = '''
我即将说一句给机械臂的指令你帮我从这句话中提取出起始物体和终止物体并从这张图中分别找到这两个物体左上角和右下角的像素坐标输出json数据结构
例如如果我的指令是请帮我把红色方块放在房子简笔画上
接着根据物体的样式估计夹爪应该到达的起始高度和终止高度默认为0.2m也就是夹爪的长度
你可以调整移动的开始高度HEIGHT_START和结束高度HEIGHT_END以抓取不同高度的物品默认是0.2m
因此任何高度不应该小于0.2m否则夹爪会与平面发生碰撞
例如如果我的指令是请帮我把红色方块放纸巾上
估计红色方块本身高度为0.05m所以开始高度设为0.2m估计纸巾有10cm高所以结束高度设为0.3m
你输出这样的格式
{
"start":"红色方块",
"start_xyxy":[[102,505],[324,860]],
"end":"房子简笔画",
"end_xyxy":[[300,150],[476,310]]
"start": "红色方块",
"start_xyxy": [[102, 505], [324, 860]],
"start_height": 0.2,
"end": "纸巾",
"end_xyxy": [[300, 150], [476, 310]],
"end_height": 0.3
}
只回复json本身即可不要回复其它内容
@ -104,6 +112,8 @@ def post_processing_viz(result, img_path, check=False):
# 起点,中心点像素坐标
START_X_CENTER = int((START_X_MIN + START_X_MAX) / 2)
START_Y_CENTER = int((START_Y_MIN + START_Y_MAX) / 2)
# 起点,高度
HEIGHT_START = result.get('start_height', 0.2)
# 终点,左上角像素坐标
END_X_MIN = int(result['end_xyxy'][0][0] * img_w / FACTOR)
END_Y_MIN = int(result['end_xyxy'][0][1] * img_h / FACTOR)
@ -113,6 +123,8 @@ def post_processing_viz(result, img_path, check=False):
# 终点,中心点像素坐标
END_X_CENTER = int((END_X_MIN + END_X_MAX) / 2)
END_Y_CENTER = int((END_Y_MIN + END_Y_MAX) / 2)
# 终点,高度
HEIGHT_END = result.get('end_height', 0.2)
# 可视化
# 画起点物体框
@ -139,7 +151,7 @@ def post_processing_viz(result, img_path, check=False):
cv2.imwrite('visualizations/{}.jpg'.format(formatted_time), img_bgr)
# 在屏幕上展示可视化效果图
cv2.imshow('zihao_vlm', img_bgr)
cv2.imshow('vlm_agent', img_bgr)
if check:
print(' 请确认可视化成功按c键继续按q键退出')
@ -150,9 +162,9 @@ def post_processing_viz(result, img_path, check=False):
if key == ord('q'): # 按q键退出
# exit()
cv2.destroyAllWindows() # 关闭所有opencv窗口
raise NameError('按q退出')
# raise NameError('按q退出')
else:
if cv2.waitKey(1) & 0xFF == None:
pass
return START_X_CENTER, START_Y_CENTER, END_X_CENTER, END_Y_CENTER
return START_X_CENTER, START_Y_CENTER, HEIGHT_START, END_X_CENTER, END_Y_CENTER, HEIGHT_END

View File

@ -1,6 +1,6 @@
# utils_vlm_move.py
# 同济子豪兄 2024-5-22
# 输入指令,多模态大模型识别图像,吸泵吸取并移动物体
# 输入指令,多模态大模型识别图像,夹爪夹取并移动物体
# print('神行太保:能看懂“图像”、听懂“人话”的机械臂')
@ -12,15 +12,15 @@ import time
def vlm_move(PROMPT='帮我把绿色方块放在小猪佩奇上', input_way='keyboard'):
'''
多模态大模型识别图像吸泵吸取并移动物体
多模态大模型识别图像夹爪夹取并移动物体
input_wayspeech语音输入keyboard键盘输入
'''
print('多模态大模型识别图像,吸泵吸取并移动物体')
print('多模态大模型识别图像,夹爪夹取并移动物体')
# 机械臂归零
print('机械臂归零')
mc.send_angles([0, 0, 0, 0, 0, 0], 50)
move_to_top_view()
time.sleep(3)
## 第一步:完成手眼标定
@ -60,7 +60,7 @@ def vlm_move(PROMPT='帮我把绿色方块放在小猪佩奇上', input_way='key
## 第五步:视觉大模型输出结果后处理和可视化
print('第五步:视觉大模型输出结果后处理和可视化')
START_X_CENTER, START_Y_CENTER, END_X_CENTER, END_Y_CENTER = post_processing_viz(result, img_path, check=True)
START_X_CENTER, START_Y_CENTER, HEIGHT_START, END_X_CENTER, END_Y_CENTER, HEIGHT_END = post_processing_viz(result, img_path, check=True)
## 第六步:手眼标定转换为机械臂坐标
print('第六步:手眼标定,将像素坐标转换为机械臂坐标')
@ -69,13 +69,13 @@ def vlm_move(PROMPT='帮我把绿色方块放在小猪佩奇上', input_way='key
# 终点,机械臂坐标
END_X_MC, END_Y_MC = eye2hand(END_X_CENTER, END_Y_CENTER)
## 第七步:吸泵吸取移动物体
print('第七步:吸泵吸取移动物体')
pump_move(mc=mc, XY_START=[START_X_MC, START_Y_MC], XY_END=[END_X_MC, END_Y_MC])
## 第七步:夹爪夹取移动物体
print('第七步:夹爪夹取移动物体')
gripper_move(XY_START=[START_X_MC, START_Y_MC], HEIGHT_START=HEIGHT_START, XY_END=[END_X_MC, END_Y_MC], HEIGHT_END=HEIGHT_END)
## 第八步:收尾
print('第八步:任务完成')
GPIO.cleanup() # 释放GPIO pin channel
# GPIO.cleanup() # 释放GPIO pin channel
cv2.destroyAllWindows() # 关闭所有opencv窗口
# exit()

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 99 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 99 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 99 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB