npx skills add https://github.com/zrong/skills --skill os-use一个全面的跨平台操作系统自动化工具包,支持屏幕截图捕获、视觉识别、鼠标/键盘控制和窗口管理。支持 macOS 12+ 和 Windows 10+。
| 功能 | macOS 实现 | Windows 实现 |
|---|---|---|
| 屏幕截图 | pyautogui + PIL | pyautogui + PIL |
| 视觉识别 | opencv-python + pyautogui |
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
opencv-python + pyautogui |
| 鼠标/键盘 | pyautogui | pyautogui |
| 窗口管理 | AppleScript (原生) | pywinauto / pygetwindow |
| 应用程序控制 | AppleScript / subprocess | subprocess / pywinauto |
| 浏览器自动化 | Chrome DevTools MCP | Chrome DevTools MCP |
通用 (macOS & Windows):
实现: pyautogui.screenshot() + PIL.Image
通用 (macOS & Windows):
可选 OCR:
pytesseract + Tesseract OCR 引擎)实现: opencv-python + pyautogui.locateOnScreen()
通用 (macOS & Windows):
实现: pyautogui
macOS 实现:
实现: 通过 subprocess 使用 AppleScript
Windows 实现:
实现: pywinauto 或 pygetwindow
通用 (macOS & Windows):
实现: Chrome DevTools MCP (独立工具)
剪贴板操作:
实现: pyperclip + pyautogui
# 创建虚拟环境
python3 -m venv ~/.nanobot/workspace/macos-automation/.venv
# 激活
source ~/.nanobot/workspace/macos-automation/.venv/bin/activate
# 安装依赖
pip install pyautogui opencv-python-headless numpy Pillow pyperclip
# macOS 特定
# (AppleScript 是内置的,无需安装)
# Windows 特定
pip install pywinauto pygetwindow
| 库 | 版本 | 用途 |
|---|---|---|
pyautogui | 0.9.54+ | 截图、鼠标/键盘控制 |
opencv-python-headless | 4.11.0.84+ | 图像识别、计算机视觉 |
numpy | 2.4.2+ | OpenCV 的数值运算 |
Pillow | 12.1.1+ | 图像处理 |
pyperclip | Latest | 剪贴板操作 |
pywinauto | Latest | Windows 窗口管理 |
pygetwindow | Latest | 跨平台窗口控制 |
所需权限:
AppleScript 注意事项:
坐标系:
管理员权限:
高 DPI 显示屏:
pyautogui.size() 获取实际屏幕尺寸窗口句柄 (HWND):
pywinauto 提供高级和低级访问import pyautogui
import time
# 模式 1: 带退避的重试
def retry_with_backoff(func, max_retries=3, base_delay=1):
for i in range(max_retries):
try:
return func()
except Exception as e:
if i == max_retries - 1:
raise
delay = base_delay * (2 ** i)
print(f"Retry {i+1}/{max_retries} after {delay}s: {e}")
time.sleep(delay)
# 模式 2: 带后备方案的安全操作
def safe_screenshot(output_path):
try:
screenshot = pyautogui.screenshot()
screenshot.save(output_path)
return output_path
except Exception as e:
print(f"Screenshot failed: {e}")
return None
# 模式 3: 坐标边界检查
def safe_click(x, y, max_x=None, max_y=None):
"""安全点击,确保坐标在屏幕范围内"""
if max_x is None or max_y is None:
max_x, max_y = pyautogui.size()
x = max(0, min(x, max_x - 1))
y = max(0, min(y, max_y - 1))
pyautogui.click(x, y)
"""
自动化 UI 测试示例
测试一个假设的登录页面
"""
import pyautogui
import time
def test_login_flow():
# 1. 截取初始状态
initial_screenshot = pyautogui.screenshot()
initial_screenshot.save("test_01_initial.png")
# 2. 查找并点击登录按钮
button_location = pyautogui.locateOnScreen(
"login_button.png",
confidence=0.9
)
if button_location:
center = pyautogui.center(button_location)
pyautogui.click(center.x, center.y)
time.sleep(1)
# 3. 输入用户名
pyautogui.typewrite("testuser@example.com", interval=0.01)
pyautogui.press('tab')
# 4. 输入密码
pyautogui.typewrite("TestPassword123", interval=0.01)
# 5. 点击提交
pyautogui.press('return')
time.sleep(2)
# 6. 验证结果
result_screenshot = pyautogui.screenshot()
result_screenshot.save("test_02_result.png")
# 检查是否出现成功提示
success_indicator = pyautogui.locateOnScreen(
"success_message.png",
confidence=0.8
)
if success_indicator:
print("✅ 测试通过:登录成功")
return True
else:
print("❌ 测试失败:未找到成功提示")
return False
# 运行测试
if __name__ == "__main__":
test_login_flow()
"""
数据录入自动化示例
将 Excel 数据自动填入网页表单
"""
import pyautogui
import pandas as pd
import time
def automate_data_entry(excel_file, form_template):
"""
从 Excel 读取数据并自动填入表单
Args:
excel_file: Excel 文件路径
form_template: 表单字段与 Excel 列的映射
"""
# 1. 读取 Excel 数据
df = pd.read_excel(excel_file)
print(f"读取到 {len(df)} 条记录")
# 2. 遍历每条记录
for index, row in df.iterrows():
print(f"\n正在处理第 {index + 1} 条记录...")
# 3. 填写每个字段
for field_name, column_name in form_template.items():
value = row.get(column_name, '')
# 查找表单字段(需要提前准备字段截图)
field_location = pyautogui.locateOnScreen(
f"form_field_{field_name}.png",
confidence=0.8
)
if field_location:
# 点击字段
center = pyautogui.center(field_location)
pyautogui.click(center.x, center.y)
time.sleep(0.2)
# 输入值
pyautogui.hotkey('ctrl', 'a') # 全选
pyautogui.typewrite(str(value), interval=0.01)
time.sleep(0.2)
else:
print(f" ⚠️ 未找到字段: {field_name}")
# 4. 提交表单
submit_btn = pyautogui.locateOnScreen(
"submit_button.png",
confidence=0.8
)
if submit_btn:
center = pyautogui.center(submit_btn)
pyautogui.click(center.x, center.y)
print(" ✅ 已提交")
time.sleep(2) # 等待提交完成
else:
print(" ⚠️ 未找到提交按钮")
# 5. 准备下一条记录
# 可能需要点击"添加新记录"或返回列表
time.sleep(1)
print("\n🎉 所有记录处理完成!")
# 使用示例
if __name__ == "__main__":
# 表单模板:字段名 -> Excel 列名
form_template = {
"name": "姓名",
"email": "邮箱",
"phone": "电话",
"address": "地址"
}
automate_data_entry("data.xlsx", form_template)
"""
屏幕监控与告警示例
监控特定区域变化,发现变化时发送通知
"""
import pyautogui
import cv2
import numpy as np
import time
from datetime import datetime
def monitor_screen_region(region, template_image=None, check_interval=5, callback=None):
"""
监控屏幕特定区域的变化
Args:
region: (left, top, width, height) 监控区域
template_image: 要查找的模板图像路径(可选)
check_interval: 检查间隔(秒)
callback: 发现变化时的回调函数
Returns:
监控会话对象(可调用 stop() 停止)
"""
class MonitorSession:
def __init__(self):
self.running = True
self.baseline = None
def stop(self):
self.running = False
session = MonitorSession()
print(f"🔍 开始监控区域: {region}")
print(f"⏱️ 检查间隔: {check_interval}秒")
print("按 Ctrl+C 停止监控\n")
try:
while session.running:
# 捕获当前区域
current = pyautogui.screenshot(region=region)
current_array = np.array(current)
if template_image:
# 模式1: 查找模板图像
template_location = pyautogui.locateOnScreen(
template_image,
confidence=0.8
)
if template_location:
print(f"✅ [{datetime.now()}] 找到模板图像: {template_location}")
if callback:
callback('template_found', {
'location': template_location,
'screenshot': current
})
else:
# 模式2: 检测变化
if session.baseline is None:
session.baseline = current_array
print(f"📸 [{datetime.now()}] 已建立基准图像")
else:
# 计算差异
diff = cv2.absdiff(session.baseline, current_array)
diff_gray = cv2.cvtColor(diff, cv2.COLOR_RGB2GRAY)
diff_score = np.mean(diff_gray)
if diff_score > 10: # 阈值可调
print(f"⚠️ [{datetime.now()}] 检测到变化! 差异分数: {diff_score:.2f}")
if callback:
callback('change_detected', {
'diff_score': diff_score,
'screenshot': current,
'baseline': session.baseline
})
# 更新基准
session.baseline = current_array
time.sleep(check_interval)
except KeyboardInterrupt:
print("\n🛑 监控已停止")
return session
# 使用示例
def alert_callback(event_type, data):
"""告警回调函数示例"""
if event_type == 'template_found':
print(f"🎯 模板出现在: {data['location']}")
# 可以在这里发送通知、发送邮件、执行操作等
elif event_type == 'change_detected':
print(f"📊 变化强度: {data['diff_score']}")
# 保存差异图像
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
data['screenshot'].save(f"change_{timestamp}.png")
if __name__ == "__main__":
# 示例1: 监控屏幕变化
print("=== 监控屏幕变化 ===")
monitor = monitor_screen_region(
region=(0, 0, 1920, 1080), # 全屏
check_interval=5, # 每5秒检查一次
callback=alert_callback
)
# 10分钟后停止(实际使用可以一直运行)
# time.sleep(600)
# monitor.stop()
# 示例2: 查找特定图像
# monitor = monitor_screen_region(
# region=(0, 0, 1920, 1080),
# template_image="target_button.png", # 要查找的图像
# check_interval=2,
# callback=alert_callback
# )
import pyautogui
def get_all_screen_sizes():
"""获取所有显示器尺寸(仅 Windows 支持多显示器详细信息)"""
# macOS 返回主屏尺寸
# Windows 可以使用 pygetwindow 或 win32api 获取多显示器信息
primary = pyautogui.size()
print(f"主屏幕尺寸: {primary}")
# Windows 示例(需要安装 pywin32)
try:
import win32api
monitors = win32api.EnumDisplayMonitors()
for i, monitor in enumerate(monitors):
print(f"显示器 {i+1}: {monitor[2]}")
except ImportError:
pass
return primary
def screenshot_specific_monitor(monitor_num=0):
"""截图指定显示器(实验性功能)"""
# 目前 pyautogui 主要支持主显示器
# 多显示器支持需要平台特定代码
pass
import cv2
import numpy as np
import pyautogui
import time
from functools import lru_cache
class ScreenCache:
"""屏幕缓存优化器"""
def __init__(self, cache_duration=0.5):
self.cache_duration = cache_duration
self.last_capture = None
self.last_capture_time = 0
def get_screenshot(self, region=None):
"""获取截图(带缓存)"""
current_time = time.time()
# 检查缓存是否有效
if (self.last_capture is not None and
current_time - self.last_capture_time < self.cache_duration and
region is None):
return self.last_capture
# 捕获新截图
screenshot = pyautogui.screenshot(region=region)
if region is None:
self.last_capture = screenshot
self.last_capture_time = current_time
return screenshot
def clear_cache(self):
"""清除缓存"""
self.last_capture = None
self.last_capture_time = 0
class FastImageFinder:
"""快速图像查找器(使用多尺度金字塔)"""
def __init__(self, scales=[0.8, 0.9, 1.0, 1.1, 1.2]):
self.scales = scales
def find_multi_scale(self, template_path, screenshot=None, confidence=0.8):
"""
多尺度图像查找
Returns:
(x, y, scale) 或 None
"""
if screenshot is None:
screenshot = pyautogui.screenshot()
template = cv2.imread(template_path)
if template is None:
return None
screenshot_cv = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2BGR)
for scale in self.scales:
# 缩放模板
scaled_template = cv2.resize(
template,
None,
fx=scale,
fy=scale,
interpolation=cv2.INTER_AREA
)
# 模板匹配
result = cv2.matchTemplate(
screenshot_cv,
scaled_template,
cv2.TM_CCOEFF_NORMED
)
_, max_val, _, max_loc = cv2.minMaxLoc(result)
if max_val >= confidence:
h, w = scaled_template.shape[:2]
center_x = max_loc[0] + w // 2
center_y = max_loc[1] + h // 2
return (center_x, center_y, scale)
return None
# 使用示例
cache = ScreenCache()
finder = FastImageFinder()
# 快速截图(带缓存)
screenshot = cache.get_screenshot()
# 多尺度图像查找
result = finder.find_multi_scale("button.png", screenshot)
if result:
x, y, scale = result
print(f"找到图像: ({x}, {y}), 缩放: {scale}")
"""
安全最佳实践
"""
import pyautogui
import hashlib
import time
class SecureAutomation:
"""安全自动化包装器"""
def __init__(self):
self.action_log = []
self.max_retries = 3
self.rate_limit_delay = 0.1 # 操作间隔
def log_action(self, action, details):
"""记录操作日志"""
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
log_entry = {
'timestamp': timestamp,
'action': action,
'details': details,
'hash': hashlib.md5(f"{timestamp}{action}{details}".encode()).hexdigest()[:8]
}
self.action_log.append(log_entry)
def safe_click(self, x, y, description=""):
"""安全点击(带验证)"""
try:
# 验证坐标在屏幕范围内
screen_width, screen_height = pyautogui.size()
if not (0 <= x < screen_width and 0 <= y < screen_height):
raise ValueError(f"坐标 ({x}, {y}) 超出屏幕范围")
# 执行点击
pyautogui.moveTo(x, y, duration=0.2)
time.sleep(self.rate_limit_delay)
pyautogui.click()
# 记录日志
self.log_action('click', f"({x}, {y}) - {description}")
return True
except Exception as e:
self.log_action('click_failed', f"({x}, {y}) - Error: {str(e)}")
return False
def safe_typewrite(self, text, interval=0.01):
"""安全输入(敏感信息不记录)"""
try:
pyautogui.typewrite(text, interval=interval)
self.log_action('typewrite', f"输入 {len(text)} 个字符 [内容已隐藏]")
return True
except Exception as e:
self.log_action('typewrite_failed', f"Error: {str(e)}")
return False
def get_action_report(self):
"""生成操作报告"""
total = len(self.action_log)
successful = sum(1 for log in self.action_log if 'failed' not in log['action'])
failed = total - successful
report = f"""
=== 自动化操作报告 ===
总操作数: {total}
成功: {successful}
失败: {failed}
成功率: {(successful/total*100):.1f}%
详细日志:
"""
for log in self.action_log:
report += f"[{log['timestamp']}] [{log['hash']}] {log['action']}: {log['details']}\n"
return report
# 使用示例
secure = SecureAutomation()
# 执行安全操作
secure.safe_click(500, 400, "登录按钮")
secure.safe_typewrite("username@example.com")
secure.safe_click(500, 450, "密码输入框")
secure.safe_typewrite("********")
secure.safe_click(500, 500, "提交按钮")
# 生成报告
print(secure.get_action_report())
症状: pyautogui 因权限错误而失败或捕获黑屏截图。
macOS 解决方案:
Windows 解决方案:
症状: 点击或截图未命中预期目标。
可能原因:
解决方案:
import pyautogui
# 调试: 打印屏幕信息
print(f"Screen size: {pyautogui.size()}")
print(f"Mouse position: {pyautogui.position()}")
# 处理高 DPI (Windows)
import ctypes
ctypes.windll.user32.SetProcessDPIAware() # Windows only
症状: locateOnScreen 返回 None,即使图像可见。
常见原因:
解决方案:
import pyautogui
import cv2
import numpy as np
# 解决方案 1: 降低置信度
location = pyautogui.locateOnScreen('button.png', confidence=0.7) # 默认是 0.9
# 解决方案 2: 多尺度匹配 (参见性能部分中的 FastImageFinder 类)
finder = FastImageFinder(scales=[0.5, 0.75, 1.0, 1.25, 1.5])
result = finder.find_multi_scale('button.png')
# 解决方案 3: 转换为灰度进行匹配
screenshot = pyautogui.screenshot()
screenshot_cv = cv2.cvtColor(np.array(screenshot), cv2.COLOR_RGB2GRAY)
template = cv2.imread('button.png', cv2.IMREAD_GRAYSCALE)
result = cv2.matchTemplate(screenshot_cv, template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
if max_val >= 0.8:
print(f"找到匹配,置信度: {max_val}")
h, w = template.shape
center_x = max_loc[0] + w // 2
center_y = max_loc[1] + h // 2
pyautogui.click(center_x, center_y)
症状: 操作缓慢,CPU 使用率高,或出现明显延迟。
优化策略:
减少截图频率
优化图像匹配
批量操作
详细代码示例请参见"性能优化"部分。
浏览器自动化:
游戏/图形应用程序:
受保护内容:
此技能设计用于与 nanobot 等 AI 助手配合使用。以下是集成方法:
# 示例: AI 助手使用此技能
def ai_assisted_automation(user_request):
"""
AI 助手使用自动化技能
Args:
user_request: 用户的自然语言请求
"""
# 1. AI 解析用户意图
intent = parse_intent(user_request)
if intent == 'screenshot':
# 2. 执行截图
screenshot = pyautogui.screenshot()
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
path = f"screenshot_{timestamp}.png"
screenshot.save(path)
return f"已截图并保存到: {path}"
elif intent == 'click_button':
# 2. 查找并点击按钮
button_name = extract_button_name(user_request)
location = pyautogui.locateOnScreen(f"{button_name}.png")
if location:
pyautogui.click(pyautogui.center(location))
return f"已点击按钮: {button_name}"
else:
return f"未找到按钮: {button_name}"
# ... 其他意图处理
# 示例: GitHub Actions 使用此技能进行视觉测试
name: Visual Regression Tests
on: [push, pull_request]
jobs:
visual-test:
runs-on: macos-latest # or windows-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install pyautogui opencv-python-headless numpy Pillow
- name: Run visual tests
run: python tests/visual_regression.py
- name: Upload screenshots
uses: actions/upload-artifact@v3
with:
name: screenshots
path: screenshots/
# 示例: 与 Prometheus/Grafana 集成进行屏幕监控
from prometheus_client import Gauge, start_http_server
import pyautogui
import time
# 定义指标
screen_change_gauge = Gauge('screen_change_score', 'Screen change detection score')
template_match_gauge = Gauge('template_match_confidence', 'Template matching confidence')
start_http_server(8000)
def monitoring_loop():
baseline = None
while True:
# 捕获屏幕
current = pyautogui.screenshot()
current_array = np.array(current)
if baseline is not None:
# 计算变化
diff = cv2.absdiff(baseline, current_array)
diff_score = np.mean(diff)
screen_change_gauge.set(diff_score)
baseline = current_array
# 检查模板
try:
location = pyautogui.locateOnScreen('alert_icon.png', confidence=0.8)
if location:
template_match_gauge.set(1.0)
else:
template_match_gauge.set(0.0)
except:
template_match_gauge.set(0.0)
time.sleep(5)
monitoring_loop()
Linux 支持
xdotool 和 scrot 集成mss 用于多显示器支持AI 驱动的识别
移动设备支持
云集成
高级分析
我们欢迎贡献!请参阅 贡献指南 了解详细信息:
此技能根据 MIT 许可证授权。详情请参阅 LICENSE。
最后更新: 2026-03-06
版本: 1.0.0
维护者: nanobot skills team
每周安装
95
仓库
GitHub 星标
2
首次出现
Mar 7, 2026
安全审计
安装于
cursor95
kimi-cli94
gemini-cli94
codex94
opencode94
github-copilot94
Skills CLI 使用指南:AI Agent 技能包管理器安装与管理教程
46,600 周安装
Telegram Bot 开发指南:Node.js/Python 构建机器人教程(含Webhook、支付、部署)
240 周安装
病毒式钩子生成器:基于心理学模式的社交媒体内容创作工具 | 提升参与度
248 周安装
Kibana Vega 技能:使用 ES|QL 与 Vega 语法创建高度定制化 Kibana 仪表板和数据可视化
248 周安装
产品经理沟通指南:利益相关者状态更新、风险沟通与决策记录模板
238 周安装
前端设计技能:创建独特生产级界面,告别AI垃圾美学,实现创意前端开发
239 周安装
App Store Connect 版本说明生成器 - 自动本地化更新日志与SEO优化
241 周安装