first commit

main
hkr04 2024-12-02 11:43:41 +08:00
commit 68acc666fc
26 changed files with 2186 additions and 0 deletions

15
API_KEY.py 100644
View File

@ -0,0 +1,15 @@
# API_KEY.py
# 同济子豪兄 2024-5-22
# 各种开放平台的KEY不要外传
# 零一万物大模型开放平台
# https://platform.lingyiwanwu.com
YI_KEY = "f8144ffaff7c459791XXXXXXXXX"
# 百度智能云千帆ModelBuilder
# https://qianfan.cloud.baidu.com
QIANFAN_ACCESS_KEY = "ALTAKRELRxSXXXXXXXXXX"
QIANFAN_SECRET_KEY = "3737d9da82de4f2XXXXXXXXXX"
# 百度智能云千帆AppBuilder-SDK
APPBUILDER_TOKEN = "bce-v3/ALTAK-7jr20xkZl4cDmhbQKA4ml/f560e5dc3XXXXXXX059XXXXXXXXX"

121
README.ipynb 100644
View File

@ -0,0 +1,121 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "c8a0febf-1001-4a87-b873-06bc1471187c",
"metadata": {},
"source": [
"# 语音控制智能体\n",
"\n",
"同济子豪兄 2024-5-23"
]
},
{
"cell_type": "markdown",
"id": "bb2091f1-1d00-40bc-9432-9d7cd3d9157e",
"metadata": {},
"source": [
"## 首先要做\n",
"\n",
"- 音频输出选择HDMI显示屏\n",
"\n",
"- 找到麦克风设备号\n",
"\n",
"- 手眼标定"
]
},
{
"cell_type": "markdown",
"id": "e2c144d1-059c-40d1-b69b-8485cb6686c5",
"metadata": {},
"source": [
"# 智能体Agent能够调用的函数"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1b933878-c06f-426d-8ca3-d4b5ddade0ac",
"metadata": {},
"outputs": [],
"source": [
"# 函数一:归零\n",
"# back_zero()\n",
"\n",
"# 函数二:放松机械臂\n",
"# relax_arms()\n",
"\n",
"# 函数三:摇头\n",
"# head_shake()\n",
"\n",
"# 函数四:点头\n",
"# head_nod()\n",
"\n",
"# 函数五:跳舞\n",
"# head_dance()\n",
"\n",
"# 函数六:开启吸泵\n",
"# pump_on()\n",
"\n",
"# 函数七:关闭吸泵\n",
"# pump_off()\n",
"\n",
"# 函数八:移动到指定坐标\n",
"# move_to_coords(X=150, Y=-120)\n",
"\n",
"# 函数九:指定关节旋转\n",
"# single_joint_move(1, 60)\n",
"\n",
"# 函数十:移动至俯视姿态\n",
"# move_to_top_view()\n",
"\n",
"# 函数十一:拍一张俯视图\n",
"# top_view_shot()\n",
"\n",
"# 函数十二:开启摄像头\n",
"# check_camera()\n",
"\n",
"# 函数十三LED灯变颜色\n",
"# llm_led('帮我把LED灯的颜色改为贝加尔湖的颜色')\n",
"\n",
"# 函数十四:移动物体\n",
"# vlm_move(PROMPT='帮我把红色方块放在小猪佩奇上')\n",
"\n",
"# 函数十五:拖动示教\n",
"# drag_teach()\n",
"\n",
"# 函数十六:休息等待\n",
"# time.sleep()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1d0c3be-3080-4543-a943-adb10e19e79b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

75
agent_go.py 100644
View File

@ -0,0 +1,75 @@
# agent_go.py
# 同济子豪兄 2024-5-27
# 看懂“图像”、听懂“人话”、指哪打哪的机械臂
# 机械臂+大模型+多模态+语音识别=具身智能体Agent
print('\n听得懂人话、看得懂图像、拎得清动作的具身智能机械臂!')
print('同济子豪兄 2024-5-27 \n')
# 导入常用函数
from utils_asr import * # 录音+语音识别
from utils_robot import * # 连接机械臂
from utils_llm import * # 大语言模型API
from utils_led import * # 控制LED灯颜色
from utils_camera import * # 摄像头
from utils_robot import * # 机械臂运动
from utils_pump import * # GPIO、吸泵
from utils_vlm_move import * # 多模态大模型识别图像,吸泵吸取并移动物体
from utils_drag_teaching import * # 拖动示教
from utils_agent import * # 智能体Agent编排
from utils_tts import * # 语音合成模块
# print('播放欢迎词')
pump_off()
# back_zero()
play_wav('asset/welcome.wav')
def agent_play():
'''
主函数语音控制机械臂智能体编排动作
'''
# 归零
back_zero()
# print('测试摄像头')
# check_camera()
# 输入指令
# 先回到原点再把LED灯改为墨绿色然后把绿色方块放在篮球上
start_record_ok = input('是否开启录音输入数字录音指定时长按k打字输入按c输入默认指令\n')
if str.isnumeric(start_record_ok):
DURATION = int(start_record_ok)
record(DURATION=DURATION) # 录音
order = speech_recognition() # 语音识别
elif start_record_ok == 'k':
order = input('请输入指令')
elif start_record_ok == 'c':
order = '先归零,再摇头,然后把绿色方块放在篮球上'
else:
print('无指令,退出')
# exit()
raise NameError('无指令,退出')
# 智能体Agent编排动作
agent_plan_output = eval(agent_plan(order))
print('智能体编排动作如下\n', agent_plan_output)
# plan_ok = input('是否继续按c继续按q退出')
plan_ok = 'c'
if plan_ok == 'c':
response = agent_plan_output['response'] # 获取机器人想对我说的话
print('开始语音合成')
tts(response) # 语音合成导出wav音频文件
play_wav('temp/tts.wav') # 播放语音合成音频文件
for each in agent_plan_output['function']: # 运行智能体规划编排的每个函数
print('开始执行动作', each)
eval(each)
elif plan_ok =='q':
# exit()
raise NameError('按q退出')
# agent_play()
if __name__ == '__main__':
agent_play()

BIN
asset/SimHei.ttf 100644

Binary file not shown.

BIN
asset/welcome.wav 100644

Binary file not shown.

20
camera_check.py 100644
View File

@ -0,0 +1,20 @@
# camera_check.py
# 调用摄像头实时画面按q键退出
# 同济子豪兄 2024-5-13
import cv2
import numpy as np
cap = cv2.VideoCapture(0)
while(True):
ret, frame = cap.read()
# gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

16
sound_check.py 100644
View File

@ -0,0 +1,16 @@
# sound_check.py
# 快速检查语音相关的所有功能:麦克风、录音、扬声器播放声音、语音识别、语音合成
# 同济子豪兄 2024-7-15
from utils_asr import * # 录音+语音识别
from utils_tts import * # 语音合成模块
print('开始录音5秒')
record(DURATION=5) # 录音
print('播放录音')
play_wav('temp/speech_record.wav')
speech_result = speech_recognition()
print('开始语音合成')
tts(speech_result)
print('播放语音合成音频')
play_wav('temp/tts.wav')

842
temp/record.txt 100644
View File

@ -0,0 +1,842 @@
[
[
2158,
2046,
1786,
1747,
1807,
1326
],
[
2158,
2046,
1786,
1744,
1808,
1326
],
[
2158,
2047,
1786,
1743,
1808,
1326
],
[
2158,
2051,
1781,
1730,
1808,
1326
],
[
2158,
2051,
1780,
1711,
1808,
1326
],
[
2158,
2061,
1780,
1683,
1808,
1326
],
[
2158,
2104,
1780,
1623,
1808,
1326
],
[
2158,
2153,
1780,
1562,
1816,
1326
],
[
2158,
2185,
1780,
1522,
1819,
1326
],
[
2158,
2221,
1780,
1492,
1820,
1326
],
[
2159,
2269,
1780,
1454,
1820,
1326
],
[
2159,
2324,
1780,
1406,
1820,
1326
],
[
2160,
2382,
1780,
1358,
1820,
1326
],
[
2160,
2442,
1782,
1309,
1819,
1326
],
[
2162,
2488,
1780,
1251,
1817,
1326
],
[
2160,
2538,
1780,
1204,
1814,
1326
],
[
2159,
2596,
1784,
1176,
1808,
1326
],
[
2163,
2638,
1780,
1128,
1814,
1324
],
[
2160,
2718,
1780,
1044,
1855,
1300
],
[
2159,
2781,
1780,
990,
1926,
1251
],
[
2162,
2821,
1782,
989,
1958,
1176
],
[
2163,
2844,
1780,
992,
1970,
1131
],
[
2163,
2874,
1783,
979,
1983,
1097
],
[
2163,
2904,
1784,
941,
2010,
1061
],
[
2163,
2912,
1785,
932,
2023,
1055
],
[
2163,
2913,
1808,
948,
2025,
1055
],
[
2163,
2936,
1901,
1038,
2025,
1055
],
[
2163,
2988,
2051,
1131,
2028,
1055
],
[
2164,
3039,
2210,
1247,
2030,
1055
],
[
2188,
3076,
2352,
1365,
2022,
1051
],
[
2229,
3100,
2468,
1467,
1990,
1030
],
[
2253,
3118,
2559,
1543,
1979,
1007
],
[
2260,
3139,
2632,
1584,
1980,
1006
],
[
2260,
3162,
2705,
1613,
1985,
1006
],
[
2273,
3189,
2786,
1658,
1990,
1006
],
[
2313,
3215,
2868,
1715,
1969,
1006
],
[
2362,
3233,
2949,
1786,
1953,
1006
],
[
2422,
3247,
3025,
1845,
1922,
1006
],
[
2473,
3265,
3095,
1868,
1876,
1006
],
[
2516,
3293,
3168,
1874,
1855,
1006
],
[
2552,
3332,
3252,
1902,
1842,
1006
],
[
2591,
3372,
3335,
1938,
1842,
1006
],
[
2635,
3397,
3389,
1969,
1912,
1006
],
[
2694,
3401,
3401,
2025,
2010,
1006
],
[
2756,
3400,
3401,
2051,
2000,
1006
],
[
2808,
3397,
3401,
2054,
2011,
1006
],
[
2855,
3396,
3401,
2039,
2044,
1006
],
[
2896,
3396,
3401,
2003,
2062,
1006
],
[
2930,
3396,
3401,
1982,
2090,
1006
],
[
2971,
3396,
3401,
1980,
2109,
1006
],
[
3004,
3395,
3401,
1994,
2110,
1006
],
[
3005,
3355,
3398,
2067,
2110,
1006
],
[
3004,
3246,
3385,
2248,
2107,
1006
],
[
3004,
3118,
3320,
2402,
2103,
1006
],
[
2995,
3026,
3209,
2386,
2103,
1006
],
[
2979,
2953,
3081,
2351,
2104,
999
],
[
2978,
2889,
2925,
2312,
2077,
999
],
[
2962,
2835,
2746,
2207,
2053,
999
],
[
2939,
2774,
2557,
2037,
2006,
999
],
[
2904,
2716,
2372,
1904,
1966,
999
],
[
2867,
2658,
2221,
1829,
1951,
999
],
[
2808,
2604,
2130,
1790,
1952,
999
],
[
2714,
2583,
2115,
1800,
2007,
999
],
[
2610,
2544,
2114,
1905,
2063,
993
],
[
2552,
2497,
2088,
2001,
2095,
894
],
[
2546,
2435,
1960,
1971,
2055,
802
],
[
2541,
2351,
1743,
1820,
2001,
802
],
[
2498,
2251,
1493,
1628,
1966,
802
],
[
2426,
2152,
1272,
1459,
1947,
802
],
[
2342,
2056,
1121,
1377,
1902,
805
],
[
2268,
1961,
1038,
1387,
1839,
829
],
[
2215,
1865,
1004,
1452,
1799,
855
],
[
2174,
1775,
984,
1531,
1756,
921
],
[
2122,
1702,
947,
1551,
1724,
999
],
[
2058,
1626,
881,
1542,
1714,
1020
],
[
2005,
1525,
783,
1516,
1717,
1023
],
[
1960,
1413,
675,
1483,
1723,
1038
],
[
1922,
1301,
576,
1469,
1719,
1089
],
[
1898,
1192,
498,
1477,
1704,
1139
],
[
1893,
1102,
438,
1493,
1681,
1146
],
[
1893,
1054,
399,
1497,
1662,
1146
],
[
1893,
1051,
393,
1482,
1657,
1146
],
[
1893,
1064,
395,
1422,
1656,
1145
],
[
1893,
1118,
398,
1343,
1647,
1145
],
[
1893,
1204,
410,
1242,
1649,
1139
],
[
1893,
1301,
480,
1214,
1661,
1127
],
[
1893,
1395,
603,
1250,
1685,
1124
],
[
1893,
1497,
755,
1317,
1717,
1111
],
[
1894,
1609,
918,
1377,
1757,
1107
],
[
1897,
1726,
1084,
1418,
1803,
1107
],
[
1897,
1840,
1252,
1464,
1850,
1107
],
[
1897,
1928,
1412,
1538,
1895,
1107
],
[
1897,
1989,
1559,
1636,
1939,
1107
],
[
1897,
2025,
1690,
1740,
1976,
1107
],
[
1898,
2050,
1799,
1839,
2003,
1107
],
[
1898,
2057,
1868,
1944,
2019,
1107
],
[
1898,
2053,
1887,
2032,
2055,
1107
],
[
1898,
2052,
1884,
2061,
2086,
1107
],
[
1898,
2056,
1883,
2126,
2135,
1107
],
[
1898,
2057,
1883,
2207,
2155,
1107
],
[
1898,
2053,
1886,
2218,
2152,
1107
],
[
1898,
2052,
1886,
2218,
2154,
1107
],
[
1898,
2050,
1887,
2175,
2147,
1107
],
[
1898,
2055,
1883,
2081,
2117,
1107
],
[
1897,
2057,
1882,
2072,
2107,
1107
]
]

Binary file not shown.

BIN
temp/tts.wav 100644

Binary file not shown.

BIN
temp/vl_now.jpg 100644

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

BIN
temp/vl_now_viz.jpg 100644

Binary file not shown.

After

Width:  |  Height:  |  Size: 117 KiB

75
utils_agent.py 100644
View File

@ -0,0 +1,75 @@
# utils_agent.py
# 同济子豪兄 2024-5-23
# Agent智能体相关函数
from utils_llm import *
AGENT_SYS_PROMPT = '''
你是我的机械臂助手机械臂内置了一些函数请你根据我的指令以json形式输出要运行的对应函数和你给我的回复
以下是所有内置函数介绍
机械臂位置归零所有关节回到原点back_zero()
放松机械臂所有关节都可以自由手动拖拽活动relax_arms()
做出摇头动作head_shake()
做出点头动作head_nod()
做出跳舞动作head_dance()
打开吸泵pump_on()
关闭吸泵pump_off()
移动到指定XY坐标比如移动到X坐标150Y坐标-120move_to_coords(X=150, Y=-120)
指定关节旋转比如关节1旋转到60度总共有6个关节single_joint_move(1, 60)
移动至俯视姿态move_to_top_view()
拍一张俯视图top_view_shot()
开启摄像头在屏幕上实时显示摄像头拍摄的画面check_camera()
LED灯改变颜色比如llm_led('帮我把LED灯的颜色改为贝加尔湖的颜色')
将一个物体移动到另一个物体的位置上比如vlm_move('帮我把红色方块放在小猪佩奇上')
拖动示教我可以拽着机械臂运动然后机械臂模仿复现出一样的动作drag_teach()
休息等待比如等待两秒time.sleep(2)
输出json格式
你直接输出json即可{开始不要输出包含```json的开头或结尾
'function'键中输出函数名列表列表中每个元素都是字符串代表要运行的函数名称和参数每个函数既可以单独运行也可以和其他函数先后运行列表元素的先后顺序表示执行函数的先后顺序
'response'键中根据我的指令和你编排的动作以第一人称输出你回复我的话不要超过20个字可以幽默和发散用上歌词台词互联网热梗名场面比如李云龙的台词甄嬛传的台词练习时长两年半
以下是一些具体的例子
我的指令回到原点你输出{'function':['back_zero()'], 'response':'回家吧,回到最初的美好'}
我的指令先回到原点然后跳舞你输出{'function':['back_zero()', 'head_dance()'], 'response':'我的舞姿,练习时长两年半'}
我的指令先回到原点然后移动到180, -90坐标你输出{'function':['back_zero()', 'move_to_coords(X=180, Y=-90)'], 'response':'精准不,老子打的就是精锐'}
我的指令先打开吸泵再把关节2旋转到30度你输出{'function':['pump_on()', single_joint_move(2, 30)], 'response':'你之前做的指星笔就是通过关节2调俯仰角'}
我的指令移动到X为160Y为-30的地方你输出{'function':['move_to_coords(X=160, Y=-30)'], 'response':'坐标移动已完成'}
我的指令拍一张俯视图然后把LED灯的颜色改为黄金的颜色你输出{'function':['top_view_shot()', llm_led('把LED灯的颜色改为黄金的颜色')], 'response':'人工智能未来比黄金值钱,你信不信'}
我的指令帮我把绿色方块放在小猪佩奇上面你输出{'function':[vlm_move('帮我把绿色方块放在小猪佩奇上面')], 'response':'它的弟弟乔治呢?'}
我的指令帮我把红色方块放在李云龙的脸上你输出{'function':[vlm_move('帮我把红色方块放在李云龙的脸上')], 'response':'你他娘的真是个天才'}
我的指令关闭吸泵打开摄像头你输出{'function':[pump_off(), check_camera()], 'response':'你是我的眼,带我阅读浩瀚的书海'}
我的指令先归零再把LED灯的颜色改为墨绿色你输出{'function':[back_zero(), llm_led('把LED灯的颜色改为墨绿色')], 'response':'这种墨绿色,很像蜀南竹海的竹子'}
我的指令我拽着你运动然后你模仿复现出这个运动你输出{'function':['drag_teach()'], 'response':'你有本事拽一个鸡你太美'}
我的指令开启拖动示教你输出{'function':['drag_teach()'], 'response':'你要我模仿我自己?'}
我的指令先回到原点等待三秒再打开吸泵把LED灯的颜色改成中国红最后把绿色方块移动到摩托车上你输出{'function':['back_zero()', 'time.sleep(3)', 'pump_on()', llm_led('把LED灯的颜色改为中国红色', vlm_move('把绿色方块移动到摩托车上'))], 'response':'如果奇迹有颜色,那一定是中国红'}
一些李云龙相关的台词如果和李云龙相关可以在response中提及对应的台词
学习学个屁
给你半斤地瓜烧
老子打的就是精锐
二营长你的意大利炮呢
你他娘的真是个天才
咱老李也是十里八乡的俊后生
不报此仇我李云龙誓不为人
你猜旅长怎么说
逢敌必亮剑绝不含糊
老子当初怎么教他打枪现在就教他怎么打仗
你咋就不敢跟旅长干一架呢
你猪八戒戴眼镜充什么大学生啊
我李云龙八岁习武南拳北腿略知一二
也要死在冲锋的路上
一些小猪佩奇相关的台词
这是我的弟弟乔治
我现在的指令是
'''
def agent_plan(AGENT_PROMPT='先回到原点再把LED灯改为墨绿色然后把绿色方块放在篮球上'):
print('Agent智能体编排动作')
PROMPT = AGENT_SYS_PROMPT + AGENT_PROMPT
agent_plan = llm_yi(PROMPT)
return agent_plan

150
utils_asr.py 100644
View File

@ -0,0 +1,150 @@
# utils_asr.py
# 同济子豪兄 2024-5-22
# 录音+语音识别
print('导入录音+语音识别模块')
import pyaudio
import wave
import numpy as np
import os
import sys
from API_KEY import *
# 确定麦克风索引号
# import sounddevice as sd
# print(sd.query_devices())
def record(MIC_INDEX=0, DURATION=5):
'''
调用麦克风录音需用arecord -l命令获取麦克风ID
DURATION录音时长
'''
print('开始 {} 秒录音'.format(DURATION))
os.system('sudo arecord -D "plughw:{}" -f dat -c 1 -r 16000 -d {} temp/speech_record.wav'.format(MIC_INDEX, DURATION))
print('录音结束')
def record_auto(MIC_INDEX=1):
'''
开启麦克风录音保存至'temp/speech_record.wav'音频文件
音量超过阈值自动开始录音低于阈值一段时间后自动停止录音
MIC_INDEX麦克风设备索引号
'''
CHUNK = 1024 # 采样宽度
RATE = 16000 # 采样率
QUIET_DB = 2000 # 分贝阈值,大于则开始录音,否则结束
delay_time = 1 # 声音降至分贝阈值后,经过多长时间,自动终止录音
FORMAT = pyaudio.paInt16
CHANNELS = 1 if sys.platform == 'darwin' else 2 # 采样通道数
# 初始化录音
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=MIC_INDEX
)
frames = [] # 所有音频帧
flag = False # 是否已经开始录音
quiet_flag = False # 当前音量小于阈值
temp_time = 0 # 当前时间是第几帧
last_ok_time = 0 # 最后正常是第几帧
START_TIME = 0 # 开始录音是第几帧
END_TIME = 0 # 结束录音是第几帧
print('可以说话啦!')
while True:
# 获取当前chunk的声音
data = stream.read(CHUNK, exception_on_overflow=False)
frames.append(data)
# 获取当前chunk的音量分贝值
temp_volume = np.max(np.frombuffer(data, dtype=np.short))
if temp_volume > QUIET_DB and flag==False:
print("音量高于阈值,开始录音")
flag =True
START_TIME = temp_time
last_ok_time = temp_time
if flag: # 录音中的各种情况
if(temp_volume < QUIET_DB and quiet_flag==False):
print("录音中,当前音量低于阈值")
quiet_flag = True
last_ok_time = temp_time
if(temp_volume > QUIET_DB):
# print('录音中,当前音量高于阈值,正常录音')
quiet_flag = False
last_ok_time = temp_time
if(temp_time > last_ok_time + delay_time*15 and quiet_flag==True):
print("音量低于阈值{:.2f}秒后,检测当前音量".format(delay_time))
if(quiet_flag and temp_volume < QUIET_DB):
print("当前音量仍然小于阈值,录音结束")
END_TIME = temp_time
break
else:
print("当前音量重新高于阈值,继续录音中")
quiet_flag = False
last_ok_time = temp_time
# print('当前帧 {} 音量 {}'.format(temp_time+1, temp_volume))
temp_time += 1
if temp_time > 150: # 超时直接退出
END_TIME = temp_time
print('超时,录音结束')
break
# 停止录音
stream.stop_stream()
stream.close()
p.terminate()
# 导出wav音频文件
output_path = 'temp/speech_record.wav'
wf = wave.open(output_path, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames[START_TIME-2:END_TIME]))
wf.close()
print('保存录音文件', output_path)
import appbuilder
# 配置密钥
os.environ["APPBUILDER_TOKEN"] = APPBUILDER_TOKEN
asr = appbuilder.ASR() # 语音识别组件
def speech_recognition(audio_path='temp/speech_record.wav'):
'''
AppBuilder-SDK语音识别组件
'''
print('开始语音识别')
# 载入wav音频文件
with wave.open(audio_path, 'rb') as wav_file:
# 获取音频文件的基本信息
num_channels = wav_file.getnchannels()
sample_width = wav_file.getsampwidth()
framerate = wav_file.getframerate()
num_frames = wav_file.getnframes()
# 获取音频数据
frames = wav_file.readframes(num_frames)
# 向API发起请求
content_data = {"audio_format": "wav", "raw_audio": frames, "rate": 16000}
message = appbuilder.Message(content_data)
speech_result = asr.run(message).content['result'][0]
print('语音识别结果:', speech_result)
return speech_result

25
utils_camera.py 100644
View File

@ -0,0 +1,25 @@
# utils_camera.py
# 同济子豪兄 2024-5-22
# 开启摄像头调用摄像头实时画面按q键退出
import cv2
import numpy as np
def check_camera():
'''
开启摄像头调用摄像头实时画面按q键退出
'''
print('开启摄像头')
cap = cv2.VideoCapture(0)
while(True):
ret, frame = cap.read()
# gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

View File

@ -0,0 +1,186 @@
# utils_drag_teaching.py
# 同济子豪兄 2024-5-23
# 拖动示教
print('导入拖动示教模块')
import time
import os
import sys
import termios
import tty
import threading
import json
from pymycobot.mycobot import MyCobot
from pymycobot import PI_PORT, PI_BAUD
# 连接机械臂
mc = MyCobot(PI_PORT, PI_BAUD, debug=False)
class Raw(object):
"""Set raw input mode for device"""
def __init__(self, stream):
self.stream = stream
self.fd = self.stream.fileno()
def __enter__(self):
self.original_stty = termios.tcgetattr(self.stream)
tty.setcbreak(self.stream)
def __exit__(self, type, value, traceback):
termios.tcsetattr(self.stream, termios.TCSANOW, self.original_stty)
class Helper(object):
def __init__(self) -> None:
self.w, self.h = os.get_terminal_size()
def echo(self, msg):
print("\r{}".format(" " * self.w), end="")
print("\r{}".format(msg), end="")
class TeachingTest(Helper):
def __init__(self, mycobot) -> None:
super().__init__()
self.mc = mycobot
self.recording = False
self.playing = False
self.record_list = []
self.record_t = None
self.play_t = None
def record(self):
self.record_list = []
self.recording = True
self.mc.set_fresh_mode(0)
def _record():
start_t = time.time()
while self.recording:
angles = self.mc.get_encoders()
if angles:
self.record_list.append(angles)
time.sleep(0.1)
print("\r {}".format(time.time() - start_t), end="")
self.echo("开始录制动作")
self.record_t = threading.Thread(target=_record, daemon=True)
self.record_t.start()
def stop_record(self):
if self.recording:
self.recording = False
self.record_t.join()
self.echo("停止录制动作")
def play(self):
self.echo("开始回放动作")
for angles in self.record_list:
# print(angles)
self.mc.set_encoders(angles, 80)
time.sleep(0.1)
self.echo("回放结束\n")
def loop_play(self):
self.playing = True
def _loop():
len_ = len(self.record_list)
i = 0
while self.playing:
idx_ = i % len_
i += 1
self.mc.set_encoders(self.record_list[idx_], 80)
time.sleep(0.1)
self.echo("开始循环回放")
self.play_t = threading.Thread(target=_loop, daemon=True)
self.play_t.start()
def stop_loop_play(self):
if self.playing:
self.playing = False
self.play_t.join()
self.echo("停止循环回放")
def save_to_local(self):
if not self.record_list:
self.echo("No data should save.")
return
save_path = os.path.dirname(__file__) + "/temp/record.txt"
with open(save_path, "w") as f:
json.dump(self.record_list, f, indent=2)
self.echo("回放动作导出至: {}".format(save_path))
def load_from_local(self):
with open(os.path.dirname(__file__) + "/temp/record.txt", "r") as f:
try:
data = json.load(f)
self.record_list = data
self.echo("载入本地动作数据成功")
except Exception:
self.echo("Error: invalid data.")
def print_menu(self):
print(
"""\
\r 拖动示教 同济子豪兄
\r q: 退出
\r r: 开始录制动作
\r c: 停止录制动作
\r p: 回放动作
\r P: 循环回放/停止循环回放
\r s: 将录制的动作保存到本地
\r l: 从本地读取录制好的动作
\r f: 放松机械臂
\r----------------------------------
"""
)
def start(self):
self.print_menu()
while not False:
with Raw(sys.stdin):
key = sys.stdin.read(1)
if key == "q":
break
elif key == "r": # recorder
self.record()
elif key == "c": # stop recorder
self.stop_record()
elif key == "p": # play
self.play()
elif key == "P": # loop play
if not self.playing:
self.loop_play()
else:
self.stop_loop_play()
elif key == "s": # save to local
self.save_to_local()
elif key == "l": # load from local
self.load_from_local()
elif key == "f": # free move
self.mc.release_all_servos()
self.echo("Released")
else:
print(key)
continue
def drag_teach():
print('机械臂归零')
mc.send_angles([0, 0, 0, 0, 0, 0], 40)
time.sleep(3)
recorder = TeachingTest(mc)
recorder.start()
print('机械臂归零')
mc.send_angles([0, 0, 0, 0, 0, 0], 40)
time.sleep(3)

41
utils_led.py 100644
View File

@ -0,0 +1,41 @@
# utils_led.py
# 同济子豪兄 2024-5-22
# 大模型控制LED灯颜色
from utils_llm import llm_qianfan, llm_yi
from utils_robot import mc
print('导入LED灯控制模块')
# 备选颜色
# 贝加尔湖、中国红、大海、绿叶、金子、蓝宝石、小猪佩奇、墨绿色、黑色
# 系统提示词
SYS_PROMPT = '我即将说的这句话中包含一个目标物体帮我把这个物体的一种可能的颜色以0-255的RGB像素值形式返回给我整理成元组格式例如(255, 30, 60),直接回复元组本身,以括号开头,不要回复任何中文内容,下面是这句话:'
def llm_led(PROMPT_LED='帮我把LED灯的颜色改为贝加尔湖的颜色'):
'''
大模型控制LED灯颜色
'''
PROMPT = SYS_PROMPT + PROMPT_LED
n = 1
while n < 5:
try:
# 调用大模型API
# response = llm_qianfan(PROMPT)
response = llm_yi(PROMPT)
# 提取颜色
rgb_tuple = eval(response)
# 设置LED灯的RGB颜色
mc.set_color(rgb_tuple[0], rgb_tuple[1], rgb_tuple[2])
print('LED灯颜色修改成功', rgb_tuple)
break
except Exception as e:
print('大模型返回json结构错误再尝试一次', e)
n += 1

59
utils_llm.py 100644
View File

@ -0,0 +1,59 @@
# utils_llm.py
# 同济子豪兄 2024-5-22
# 调用大语言模型API
print('导入大模型API模块')
import os
import qianfan
def llm_qianfan(PROMPT='你好,你是谁?'):
'''
百度智能云千帆大模型平台API
'''
# 传入 ACCESS_KEY 和 SECRET_KEY
os.environ["QIANFAN_ACCESS_KEY"] = QIANFAN_ACCESS_KEY
os.environ["QIANFAN_SECRET_KEY"] = QIANFAN_SECRET_KEY
# 选择大语言模型
MODEL = "ERNIE-Bot-4"
# MODEL = "ERNIE Speed"
# MODEL = "ERNIE-Lite-8K"
# MODEL = 'ERNIE-Tiny-8K'
chat_comp = qianfan.ChatCompletion(model=MODEL)
# 输入给大模型
resp = chat_comp.do(
messages=[{"role": "user", "content": PROMPT}],
top_p=0.8,
temperature=0.3,
penalty_score=1.0
)
response = resp["result"]
return response
import openai
from openai import OpenAI
from API_KEY import *
def llm_yi(PROMPT='你好,你是谁?'):
'''
零一万物大模型API
'''
API_BASE = "https://api.lingyiwanwu.com/v1"
API_KEY = YI_KEY
MODEL = 'yi-large'
# MODEL = 'yi-medium'
# MODEL = 'yi-spark'
# 访问大模型API
client = OpenAI(api_key=API_KEY, base_url=API_BASE)
completion = client.chat.completions.create(model=MODEL, messages=[{"role": "user", "content": PROMPT}])
result = completion.choices[0].message.content.strip()
return result

37
utils_pump.py 100644
View File

@ -0,0 +1,37 @@
# utils_pump.py
# 同济子豪兄 2024-5-22
# GPIO引脚、吸泵相关函数
print('导入吸泵控制模块')
import RPi.GPIO as GPIO
import time
# 初始化GPIO
GPIO.setwarnings(False) # 不打印 warning 信息
GPIO.setmode(GPIO.BCM)
GPIO.setup(20, GPIO.OUT)
GPIO.setup(21, GPIO.OUT)
GPIO.output(20, 1) # 关闭吸泵电磁阀
def pump_on():
'''
开启吸泵
'''
print(' 开启吸泵')
GPIO.output(20, 0)
def pump_off():
'''
关闭吸泵吸泵放气释放物体
'''
print(' 关闭吸泵')
GPIO.output(20, 1) # 关闭吸泵电磁阀
time.sleep(0.05)
GPIO.output(21, 0) # 打开泄气阀门
time.sleep(0.2)
GPIO.output(21, 1)
time.sleep(0.05)
GPIO.output(21, 0) # 再一次泄气,确保物体释放
time.sleep(0.2)
GPIO.output(21, 1)
time.sleep(0.05)

222
utils_robot.py 100644
View File

@ -0,0 +1,222 @@
# utils_robot.py
# 同济子豪兄 2024-5-22
# 启动并连接机械臂,导入各种工具包
print('导入机械臂连接模块')
from pymycobot.mycobot import MyCobot
from pymycobot import PI_PORT, PI_BAUD
import cv2
import numpy as np
import time
from utils_pump import *
# 连接机械臂
mc = MyCobot(PI_PORT, PI_BAUD)
# 设置运动模式为插补
mc.set_fresh_mode(0)
import RPi.GPIO as GPIO
# 初始化GPIO
GPIO.setwarnings(False) # 不打印 warning 信息
GPIO.setmode(GPIO.BCM)
GPIO.setup(20, GPIO.OUT)
GPIO.setup(21, GPIO.OUT)
GPIO.output(20, 1) # 关闭吸泵电磁阀
def back_zero():
'''
机械臂归零
'''
print('机械臂归零')
mc.send_angles([0, 0, 0, 0, 0, 0], 40)
time.sleep(3)
def relax_arms():
print('放松机械臂关节')
mc.release_all_servos()
def head_shake():
# 左右摆头
mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70)
time.sleep(1)
for count in range(2):
mc.send_angle(5, 30, 80)
time.sleep(0.5)
mc.send_angle(5, -30,80)
time.sleep(0.5)
# mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70)
# time.sleep(1)
mc.send_angles([0, 0, 0, 0, 0, 0], 40)
time.sleep(2)
def head_dance():
# 跳舞
mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70)
time.sleep(1)
for count in range(1):
mc.send_angles([(-0.17),(-94.3),118.91,(-39.9),59.32,(-0.52)],80)
time.sleep(1.2)
mc.send_angles([67.85,(-3.42),(-116.98),106.52,23.11,(-0.52)],80)
time.sleep(1.7)
mc.send_angles([(-38.14),(-115.04),116.63,69.69,3.25,(-11.6)],80)
time.sleep(1.7)
mc.send_angles([2.72,(-26.19),140.27,(-110.74),(-6.15),(-11.25)],80)
time.sleep(1)
mc.send_angles([0,0,0,0,0,0],80)
def head_nod():
# 点头
mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70)
for count in range(2):
mc.send_angle(4, 13, 70)
time.sleep(0.5)
mc.send_angle(4, -20, 70)
time.sleep(1)
mc.send_angle(4,13,70)
time.sleep(0.5)
mc.send_angles([0.87,(-50.44),47.28,0.35,(-0.43),(-0.26)],70)
def move_to_coords(X=150, Y=-130, HEIGHT_SAFE=230):
print('移动至指定坐标X {} Y {}'.format(X, Y))
mc.send_coords([X, Y, HEIGHT_SAFE, 0, 180, 90], 20, 0)
time.sleep(4)
def single_joint_move(joint_index, angle):
print('关节 {} 旋转至 {}'.format(joint_index, angle))
mc.send_angle(joint_index, angle, 40)
time.sleep(2)
def move_to_top_view():
print('移动至俯视姿态')
mc.send_angles([-62.13, 8.96, -87.71, -14.41, 2.54, -16.34], 10)
time.sleep(3)
def top_view_shot(check=False):
'''
拍摄一张图片并保存
check是否需要人工看屏幕确认拍照成功再在键盘上按q键确认继续
'''
print(' 移动至俯视姿态')
move_to_top_view()
# 获取摄像头传入0表示获取系统默认摄像头
cap = cv2.VideoCapture(0)
# 打开cap
cap.open(0)
time.sleep(0.3)
success, img_bgr = cap.read()
# 保存图像
print(' 保存至temp/vl_now.jpg')
cv2.imwrite('temp/vl_now.jpg', img_bgr)
# 屏幕上展示图像
cv2.destroyAllWindows() # 关闭所有opencv窗口
cv2.imshow('zihao_vlm', img_bgr)
if check:
print('请确认拍照成功按c键继续按q键退出')
while(True):
key = cv2.waitKey(10) & 0xFF
if key == ord('c'): # 按c键继续
break
if key == ord('q'): # 按q键退出
# exit()
cv2.destroyAllWindows() # 关闭所有opencv窗口
raise NameError('按q退出')
else:
if cv2.waitKey(10) & 0xFF == None:
pass
# 关闭摄像头
cap.release()
# 关闭图像窗口
# cv2.destroyAllWindows()
def eye2hand(X_im=160, Y_im=120):
'''
输入目标点在图像中的像素坐标转换为机械臂坐标
'''
# 整理两个标定点的坐标
cali_1_im = [130, 290] # 左下角,第一个标定点的像素坐标,要手动填!
cali_1_mc = [-21.8, -197.4] # 左下角,第一个标定点的机械臂坐标,要手动填!
cali_2_im = [640, 0] # 右上角,第二个标定点的像素坐标
cali_2_mc = [215, -59.1] # 右上角,第二个标定点的机械臂坐标,要手动填!
X_cali_im = [cali_1_im[0], cali_2_im[0]] # 像素坐标
X_cali_mc = [cali_1_mc[0], cali_2_mc[0]] # 机械臂坐标
Y_cali_im = [cali_2_im[1], cali_1_im[1]] # 像素坐标,先小后大
Y_cali_mc = [cali_2_mc[1], cali_1_mc[1]] # 机械臂坐标,先大后小
# X差值
X_mc = int(np.interp(X_im, X_cali_im, X_cali_mc))
# Y差值
Y_mc = int(np.interp(Y_im, Y_cali_im, Y_cali_mc))
return X_mc, Y_mc
# 吸泵吸取并移动物体
def pump_move(mc, XY_START=[230,-50], HEIGHT_START=90, XY_END=[100,220], HEIGHT_END=100, HEIGHT_SAFE=220):
'''
用吸泵将物体从起点吸取移动至终点
mc机械臂实例
XY_START起点机械臂坐标
HEIGHT_START起点高度方块用90药盒子用70
XY_END终点机械臂坐标
HEIGHT_END终点高度
HEIGHT_SAFE搬运途中安全高度
'''
# 初始化GPIO
GPIO.setmode(GPIO.BCM)
GPIO.setup(20, GPIO.OUT)
GPIO.setup(21, GPIO.OUT)
# 设置运动模式为插补
mc.set_fresh_mode(0)
# # 机械臂归零
# print(' 机械臂归零')
# mc.send_angles([0, 0, 0, 0, 0, 0], 40)
# time.sleep(4)
# 吸泵移动至物体上方
print(' 吸泵移动至物体上方')
mc.send_coords([XY_START[0], XY_START[1], HEIGHT_SAFE, 0, 180, 90], 20, 0)
time.sleep(4)
# 开启吸泵
pump_on()
# 吸泵向下吸取物体
print(' 吸泵向下吸取物体')
mc.send_coords([XY_START[0], XY_START[1], HEIGHT_START, 0, 180, 90], 15, 0)
time.sleep(4)
# 升起物体
print(' 升起物体')
mc.send_coords([XY_START[0], XY_START[1], HEIGHT_SAFE, 0, 180, 90], 15, 0)
time.sleep(4)
# 搬运物体至目标上方
print(' 搬运物体至目标上方')
mc.send_coords([XY_END[0], XY_END[1], HEIGHT_SAFE, 0, 180, 90], 15, 0)
time.sleep(4)
# 向下放下物体
print(' 向下放下物体')
mc.send_coords([XY_END[0], XY_END[1], HEIGHT_END, 0, 180, 90], 20, 0)
time.sleep(3)
# 关闭吸泵
pump_off()
# 机械臂归零
print(' 机械臂归零')
mc.send_angles([0, 0, 0, 0, 0, 0], 40)
time.sleep(3)

60
utils_tts.py 100644
View File

@ -0,0 +1,60 @@
# utils_tts.py
# 同济子豪兄 2024-5-23
# 语音合成
print('导入语音合成模块')
import os
import appbuilder
from API_KEY import *
import pyaudio
import wave
tts_ab = appbuilder.TTS()
def tts(TEXT='我是同济子豪兄的麒麟臂', tts_wav_path = 'temp/tts.wav'):
'''
语音合成TTS生成wav音频文件
'''
inp = appbuilder.Message(content={"text": TEXT})
out = tts_ab.run(inp, model="paddlespeech-tts", audio_type="wav")
# out = tts_ab.run(inp, audio_type="wav")
with open(tts_wav_path, "wb") as f:
f.write(out.content["audio_binary"])
# print("TTS语音合成导出wav音频文件至{}".format(tts_wav_path))
def play_wav(wav_file='asset/welcome.wav'):
'''
播放wav音频文件
'''
prompt = 'aplay -t wav {} -q'.format(wav_file)
os.system(prompt)
# def play_wav(wav_file='temp/tts.wav'):
# '''
# 播放wav文件
# '''
# wf = wave.open(wav_file, 'rb')
# # 实例化PyAudio
# p = pyaudio.PyAudio()
# # 打开流
# stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
# channels=wf.getnchannels(),
# rate=wf.getframerate(),
# output=True)
# chunk_size = 1024
# # 读取数据
# data = wf.readframes(chunk_size)
# # 播放音频
# while data != b'':
# stream.write(data)
# data = wf.readframes(chunk_size)
# # 停止流关闭流和PyAudio
# stream.stop_stream()
# stream.close()
# p.terminate()

158
utils_vlm.py 100644
View File

@ -0,0 +1,158 @@
# utils_vlm.py
# 同济子豪兄 2024-5-22
# 多模态大模型、可视化
print('导入视觉大模型模块')
import time
import cv2
import numpy as np
from PIL import Image
from PIL import ImageFont, ImageDraw
# 导入中文字体,指定字号
font = ImageFont.truetype('asset/SimHei.ttf', 26)
from API_KEY import *
# 系统提示词
SYSTEM_PROMPT = '''
我即将说一句给机械臂的指令你帮我从这句话中提取出起始物体和终止物体并从这张图中分别找到这两个物体左上角和右下角的像素坐标输出json数据结构
例如如果我的指令是请帮我把红色方块放在房子简笔画上
你输出这样的格式
{
"start":"红色方块",
"start_xyxy":[[102,505],[324,860]],
"end":"房子简笔画",
"end_xyxy":[[300,150],[476,310]]
}
只回复json本身即可不要回复其它内容
我现在的指令是
'''
# Yi-Vision调用函数
import openai
from openai import OpenAI
import base64
def yi_vision_api(PROMPT='帮我把红色方块放在钢笔上', img_path='temp/vl_now.jpg'):
'''
零一万物大模型开放平台yi-vision视觉语言多模态大模型API
'''
client = OpenAI(
api_key=YI_KEY,
base_url="https://api.lingyiwanwu.com/v1"
)
# 编码为base64数据
with open(img_path, 'rb') as image_file:
image = 'data:image/jpeg;base64,' + base64.b64encode(image_file.read()).decode('utf-8')
# 向大模型发起请求
completion = client.chat.completions.create(
model="yi-vision",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": SYSTEM_PROMPT + PROMPT
},
{
"type": "image_url",
"image_url": {
"url": image
}
}
]
},
]
)
# 解析大模型返回结果
result = eval(completion.choices[0].message.content.strip())
print(' 大模型调用成功!')
return result
def post_processing_viz(result, img_path, check=False):
'''
视觉大模型输出结果后处理和可视化
check是否需要人工看屏幕确认可视化成功按键继续或退出
'''
# 后处理
img_bgr = cv2.imread(img_path)
img_h = img_bgr.shape[0]
img_w = img_bgr.shape[1]
# 缩放因子
FACTOR = 999
# 起点物体名称
START_NAME = result['start']
# 终点物体名称
END_NAME = result['end']
# 起点,左上角像素坐标
START_X_MIN = int(result['start_xyxy'][0][0] * img_w / FACTOR)
START_Y_MIN = int(result['start_xyxy'][0][1] * img_h / FACTOR)
# 起点,右下角像素坐标
START_X_MAX = int(result['start_xyxy'][1][0] * img_w / FACTOR)
START_Y_MAX = int(result['start_xyxy'][1][1] * img_h / FACTOR)
# 起点,中心点像素坐标
START_X_CENTER = int((START_X_MIN + START_X_MAX) / 2)
START_Y_CENTER = int((START_Y_MIN + START_Y_MAX) / 2)
# 终点,左上角像素坐标
END_X_MIN = int(result['end_xyxy'][0][0] * img_w / FACTOR)
END_Y_MIN = int(result['end_xyxy'][0][1] * img_h / FACTOR)
# 终点,右下角像素坐标
END_X_MAX = int(result['end_xyxy'][1][0] * img_w / FACTOR)
END_Y_MAX = int(result['end_xyxy'][1][1] * img_h / FACTOR)
# 终点,中心点像素坐标
END_X_CENTER = int((END_X_MIN + END_X_MAX) / 2)
END_Y_CENTER = int((END_Y_MIN + END_Y_MAX) / 2)
# 可视化
# 画起点物体框
img_bgr = cv2.rectangle(img_bgr, (START_X_MIN, START_Y_MIN), (START_X_MAX, START_Y_MAX), [0, 0, 255], thickness=3)
# 画起点中心点
img_bgr = cv2.circle(img_bgr, [START_X_CENTER, START_Y_CENTER], 6, [0, 0, 255], thickness=-1)
# 画终点物体框
img_bgr = cv2.rectangle(img_bgr, (END_X_MIN, END_Y_MIN), (END_X_MAX, END_Y_MAX), [255, 0, 0], thickness=3)
# 画终点中心点
img_bgr = cv2.circle(img_bgr, [END_X_CENTER, END_Y_CENTER], 6, [255, 0, 0], thickness=-1)
# 写中文物体名称
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) # BGR 转 RGB
img_pil = Image.fromarray(img_rgb) # array 转 pil
draw = ImageDraw.Draw(img_pil)
# 写起点物体中文名称
draw.text((START_X_MIN, START_Y_MIN-32), START_NAME, font=font, fill=(255, 0, 0, 1)) # 文字坐标中文字符串字体rgba颜色
# 写终点物体中文名称
draw.text((END_X_MIN, END_Y_MIN-32), END_NAME, font=font, fill=(0, 0, 255, 1)) # 文字坐标中文字符串字体rgba颜色
img_bgr = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR) # RGB转BGR
# 保存可视化效果图
cv2.imwrite('temp/vl_now_viz.jpg', img_bgr)
formatted_time = time.strftime("%Y%m%d%H%M", time.localtime())
cv2.imwrite('visualizations/{}.jpg'.format(formatted_time), img_bgr)
# 在屏幕上展示可视化效果图
cv2.imshow('zihao_vlm', img_bgr)
if check:
print(' 请确认可视化成功按c键继续按q键退出')
while(True):
key = cv2.waitKey(10) & 0xFF
if key == ord('c'): # 按c键继续
break
if key == ord('q'): # 按q键退出
# exit()
cv2.destroyAllWindows() # 关闭所有opencv窗口
raise NameError('按q退出')
else:
if cv2.waitKey(1) & 0xFF == None:
pass
return START_X_CENTER, START_Y_CENTER, END_X_CENTER, END_Y_CENTER

84
utils_vlm_move.py 100644
View File

@ -0,0 +1,84 @@
# utils_vlm_move.py
# 同济子豪兄 2024-5-22
# 输入指令,多模态大模型识别图像,吸泵吸取并移动物体
# print('神行太保:能看懂“图像”、听懂“人话”的机械臂')
from utils_robot import *
from utils_asr import *
from utils_vlm import *
import time
def vlm_move(PROMPT='帮我把绿色方块放在小猪佩奇上', input_way='keyboard'):
'''
多模态大模型识别图像吸泵吸取并移动物体
input_wayspeech语音输入keyboard键盘输入
'''
print('多模态大模型识别图像,吸泵吸取并移动物体')
# 机械臂归零
print('机械臂归零')
mc.send_angles([0, 0, 0, 0, 0, 0], 50)
time.sleep(3)
## 第一步:完成手眼标定
print('第一步:完成手眼标定')
## 第二步:发出指令
# PROMPT_BACKUP = '帮我把绿色方块放在小猪佩奇上' # 默认指令
# if input_way == 'keyboard':
# PROMPT = input('第二步:输入指令')
# if PROMPT == '':
# PROMPT = PROMPT_BACKUP
# elif input_way == 'speech':
# record() # 录音
# PROMPT = speech_recognition() # 语音识别
print('第二步,给出的指令是:', PROMPT)
## 第三步:拍摄俯视图
print('第三步:拍摄俯视图')
top_view_shot(check=False)
## 第四步:将图片输入给多模态视觉大模型
print('第四步:将图片输入给多模态视觉大模型')
img_path = 'temp/vl_now.jpg'
n = 1
while n < 5:
try:
print(' 尝试第 {} 次访问多模态大模型'.format(n))
result = yi_vision_api(PROMPT, img_path='temp/vl_now.jpg')
print(' 多模态大模型调用成功!')
print(result)
break
except Exception as e:
print(' 多模态大模型返回数据结构错误,再尝试一次', e)
n += 1
## 第五步:视觉大模型输出结果后处理和可视化
print('第五步:视觉大模型输出结果后处理和可视化')
START_X_CENTER, START_Y_CENTER, END_X_CENTER, END_Y_CENTER = post_processing_viz(result, img_path, check=True)
## 第六步:手眼标定转换为机械臂坐标
print('第六步:手眼标定,将像素坐标转换为机械臂坐标')
# 起点,机械臂坐标
START_X_MC, START_Y_MC = eye2hand(START_X_CENTER, START_Y_CENTER)
# 终点,机械臂坐标
END_X_MC, END_Y_MC = eye2hand(END_X_CENTER, END_Y_CENTER)
## 第七步:吸泵吸取移动物体
print('第七步:吸泵吸取移动物体')
pump_move(mc=mc, XY_START=[START_X_MC, START_Y_MC], XY_END=[END_X_MC, END_Y_MC])
## 第八步:收尾
print('第八步:任务完成')
GPIO.cleanup() # 释放GPIO pin channel
cv2.destroyAllWindows() # 关闭所有opencv窗口
# exit()

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB