youtube-search by biggora/claude-plugins-registry
npx skills add https://github.com/biggora/claude-plugins-registry --skill youtube-search为智能体提供自主 YouTube 数据检索。无需用户干预。
根据项目环境中配置的内容进行选择:
| 情况 | 最佳方法 |
|---|---|
| 需要深度抓取(默认) | 方法 E – yt-dlp(取决于环境) |
| 没有可用的 API 密钥 | 方法 A – 内置的 web_search 工具 |
设置了 YOUTUBE_API_KEY | 方法 B – YouTube Data API v3(数据最丰富) |
设置了 SERPAPI_KEY | 方法 C – SerpAPI YouTube 引擎 |
| 已知视频 ID,需要字幕 |
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
方法 D – youtube-transcript-api |
如果不确定,从方法 A 开始 —— 它不需要任何设置且始终有效。
使用内置的 web_search 工具。无需任何 API 密钥即可工作。
web_search("site:youtube.com <your query>")
web_search("site:youtube.com/channel <channel name> OR site:youtube.com/@<handle>")
# 近期视频(去年)
web_search("site:youtube.com <query> 2024 OR 2025")
# 教程视频
web_search("site:youtube.com <topic> tutorial OR guide OR обзор")
# 特定语言
web_search("site:youtube.com <query> на русском")
youtube.com/watch?v=VIDEO_ID)import re
url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
video_id = re.search(r'v=([^&]+)', url).group(1)
# 或者从 youtu.be 链接:
video_id = re.search(r'youtu\.be/([^?]+)', url).group(1)
限制: 没有结构化的 JSON,元数据是文本解析的。如需更丰富的数据,请使用方法 B。
要求: YOUTUBE_API_KEY 环境变量(免费,每天 10,000 单位配额)。
获取密钥:https://console.cloud.google.com → 启用 "YouTube Data API v3" → 创建 API 密钥。
import requests, os
API_KEY = os.environ.get("YOUTUBE_API_KEY")
BASE = "https://www.googleapis.com/youtube/v3"
def youtube_search(query, max_results=10, order="relevance",
video_duration=None, published_after=None, lang=None):
"""
order: relevance | date | viewCount | rating | title
video_duration: short (<4min) | medium (4-20min) | long (>20min)
published_after: ISO 8601 e.g. "2024-01-01T00:00:00Z"
lang: ISO 639-1 e.g. "ru", "en"
"""
params = {
"part": "snippet",
"q": query,
"maxResults": max_results,
"type": "video",
"order": order,
"key": API_KEY,
}
if video_duration:
params["videoDuration"] = video_duration
if published_after:
params["publishedAfter"] = published_after
if lang:
params["relevanceLanguage"] = lang
r = requests.get(f"{BASE}/search", params=params)
r.raise_for_status()
items = r.json().get("items", [])
return [{
"video_id": item["id"]["videoId"],
"title": item["snippet"]["title"],
"channel": item["snippet"]["channelTitle"],
"channel_id": item["snippet"]["channelId"],
"description": item["snippet"]["description"],
"published_at": item["snippet"]["publishedAt"],
"thumbnail": item["snippet"]["thumbnails"]["high"]["url"],
"url": f"https://youtube.com/watch?v={item['id']['videoId']}"
} for item in items if item["id"].get("videoId")]
def get_video_stats(video_ids: list):
"""传入视频 ID 列表,返回统计数据。每次调用消耗 1 个配额单位。"""
ids = ",".join(video_ids[:50]) # 每次请求最多 50 个
params = {
"part": "statistics,contentDetails,snippet",
"id": ids,
"key": API_KEY,
}
r = requests.get(f"{BASE}/videos", params=params)
r.raise_for_status()
results = []
for item in r.json().get("items", []):
stats = item.get("statistics", {})
content = item.get("contentDetails", {})
results.append({
"video_id": item["id"],
"title": item["snippet"]["title"],
"views": int(stats.get("viewCount", 0)),
"likes": int(stats.get("likeCount", 0)),
"comments": int(stats.get("commentCount", 0)),
"duration_iso": content.get("duration"), # e.g. "PT5M30S"
"tags": item["snippet"].get("tags", []),
})
return results
import re
def parse_duration(iso_duration):
"""转换 PT5M30S → 330 秒"""
match = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', iso_duration)
if not match: return 0
h, m, s = [int(x or 0) for x in match.groups()]
return h * 3600 + m * 60 + s
def search_channel(channel_name, max_results=5):
params = {
"part": "snippet",
"q": channel_name,
"type": "channel",
"maxResults": max_results,
"key": API_KEY,
}
r = requests.get(f"{BASE}/search", params=params)
channel_ids = [item["id"]["channelId"] for item in r.json().get("items", [])]
# 获取频道统计数据
params2 = {"part": "statistics,snippet", "id": ",".join(channel_ids), "key": API_KEY}
r2 = requests.get(f"{BASE}/channels", params=params2)
return [{
"channel_id": ch["id"],
"name": ch["snippet"]["title"],
"subscribers": int(ch["statistics"].get("subscriberCount", 0)),
"total_views": int(ch["statistics"].get("viewCount", 0)),
"video_count": int(ch["statistics"].get("videoCount", 0)),
"url": f"https://youtube.com/channel/{ch['id']}"
} for ch in r2.json().get("items", [])]
| 操作 | 成本 |
|---|---|
| search.list | 100 单位 |
| videos.list (stats) | 1 单位 |
| channels.list | 1 单位 |
| playlists.list | 1 单位 |
提示: 搜索 = 100 单位。获取 50 个视频的统计数据 = 1 单位。始终批量调用 videos.list。
要求: SERPAPI_KEY 环境变量。
免费层级:每月 100 次搜索。提供付费计划。
import requests, os
def serpapi_youtube_search(query, max_results=10, lang="ru"):
params = {
"engine": "youtube",
"search_query": query,
"api_key": os.environ.get("SERPAPI_KEY"),
"hl": lang, # 界面语言
}
r = requests.get("https://serpapi.com/search", params=params)
r.raise_for_status()
results = []
for item in r.json().get("video_results", [])[:max_results]:
results.append({
"title": item.get("title"),
"video_id": item.get("id") or item.get("link", "").split("v=")[-1],
"url": item.get("link"),
"channel": item.get("channel", {}).get("name"),
"views": item.get("views"),
"duration": item.get("length"),
"published": item.get("published_date"),
"description": item.get("description"),
"thumbnail": item.get("thumbnail", {}).get("static"),
})
return results
相较于 YouTube API 的优势: 直接从搜索返回观看次数、时长、发布日期 —— 无需额外的 API 调用。
要求: pip install youtube-transcript-api --break-system-packages
无需 API 密钥。
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
def get_transcript(video_id, languages=["ru", "en"]):
"""
将完整字幕作为字符串返回。
languages: 偏好顺序,回退到自动生成的字幕。
"""
try:
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
# 首先尝试首选语言
try:
transcript = transcript_list.find_transcript(languages)
except NoTranscriptFound:
# 回退到任何可用的字幕
transcript = transcript_list.find_generated_transcript(
transcript_list._generated_transcripts.keys()
)
entries = transcript.fetch()
full_text = " ".join([e["text"] for e in entries])
return {
"video_id": video_id,
"language": transcript.language_code,
"is_generated": transcript.is_generated,
"text": full_text,
"entries": entries # {text, start, duration} 的列表
}
except TranscriptsDisabled:
return {"error": "此视频的字幕已禁用"}
except Exception as e:
return {"error": str(e)}
def get_available_languages(video_id):
"""列出视频所有可用的字幕语言。"""
tl = YouTubeTranscriptApi.list_transcripts(video_id)
return [{"code": t.language_code, "name": t.language, "generated": t.is_generated}
for t in tl]
使用场景: 通过方法 A 或 B 找到视频 ID 后,提取全文内容用于分析、摘要或内容研究。
要求: pip install yt-dlp --break-system-packages
无需 API 密钥。在沙盒环境中可能被阻止 —— 请先测试。
import yt_dlp, json
def ytdlp_search(query, max_results=10):
"""使用 yt-dlp 搜索 YouTube。返回丰富的元数据。"""
ydl_opts = {
"quiet": True,
"no_warnings": True,
"extract_flat": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
results = ydl.extract_info(f"ytsearch{max_results}:{query}", download=False)
return [{
"title": v.get("title"),
"video_id": v.get("id"),
"url": f"https://youtube.com/watch?v={v.get('id')}",
"duration": v.get("duration"),
"view_count": v.get("view_count"),
"channel": v.get("channel"),
"upload_date": v.get("upload_date"),
} for v in results.get("entries", []) if v]
def ytdlp_get_video_info(video_url):
"""获取单个视频的完整元数据。"""
ydl_opts = {"quiet": True, "no_warnings": True}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=False)
return info
def ytdlp_get_subtitles(video_url, lang="ru"):
"""下载并返回字幕文本。"""
import tempfile, os
with tempfile.TemporaryDirectory() as tmpdir:
ydl_opts = {
"quiet": True,
"writesubtitles": True,
"writeautomaticsub": True,
"subtitleslangs": [lang, "en"],
"skip_download": True,
"outtmpl": f"{tmpdir}/%(id)s.%(ext)s",
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([video_url])
for f in os.listdir(tmpdir):
if f.endswith(".vtt") or f.endswith(".srt"):
return open(os.path.join(tmpdir, f)).read()
return None
注意: yt-dlp 直接向 YouTube 发送请求 —— 在受限的网络环境中可能被阻止。始终先用 yt-dlp --version 快速调用进行测试。
# 1. 搜索视频
results = youtube_search("AI сервисы обзор", max_results=20,
order="viewCount", lang="ru")
# 2. 用统计数据丰富信息
video_ids = [v["video_id"] for v in results]
stats = get_video_stats(video_ids)
# 3. 获取热门视频的字幕
top_videos = sorted(stats, key=lambda x: x["views"], reverse=True)[:5]
for v in top_videos:
transcript = get_transcript(v["video_id"], languages=["ru"])
# 分析、摘要、提取关键词...
# 无需设置 - 使用内置的 web_search 工具
# web_search("site:youtube.com AI сервисы обзор 2025")
# 解析结果,提取视频 ID,然后根据需要使用的字幕 API
# 查找频道
channels = search_channel("название канала")
channel_id = channels[0]["channel_id"]
# 获取频道的最新视频
params = {
"part": "snippet",
"channelId": channel_id,
"order": "date",
"maxResults": 10,
"type": "video",
"key": API_KEY,
}
r = requests.get(f"{BASE}/search", params=params)
# 方法 B (YouTube Data API) 所需
export YOUTUBE_API_KEY="AIza..."
# 方法 C (SerpAPI) 所需
export SERPAPI_KEY="..."
# 方法 D 和 E (Python 库) 所需
pip install youtube-transcript-api yt-dlp --break-system-packages
references/youtube-api-quota.md — 配额优化策略references/parsing-examples.md — 俄语内容的真实解析示例每周安装次数
1
仓库
GitHub 星标数
1
首次出现
1 天前
安全审计
安装于
junie1
amp1
cline1
opencode1
cursor1
kimi-cli1
Autonomous YouTube data retrieval for agents. No user intervention required.
Choose based on what's configured in the project environment:
| Situation | Best Method |
|---|---|
| Deep scraping needed (default) | Method E – yt-dlp (environment-dependent) |
| No API keys available | Method A – web_search built-in tool |
YOUTUBE_API_KEY set | Method B – YouTube Data API v3 (richest data) |
SERPAPI_KEY set | Method C – SerpAPI YouTube engine |
| Video ID known, need transcript | Method D – youtube-transcript-api |
Start with Method A if you're unsure — it requires nothing and always works.
Use the built-in web_search tool. Works without any API keys.
web_search("site:youtube.com <your query>")
web_search("site:youtube.com/channel <channel name> OR site:youtube.com/@<handle>")
# Recent videos (last year)
web_search("site:youtube.com <query> 2024 OR 2025")
# Tutorial videos
web_search("site:youtube.com <topic> tutorial OR guide OR обзор")
# Specific language
web_search("site:youtube.com <query> на русском")
youtube.com/watch?v=VIDEO_ID)import re
url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
video_id = re.search(r'v=([^&]+)', url).group(1)
# or from youtu.be links:
video_id = re.search(r'youtu\.be/([^?]+)', url).group(1)
Limitation: No structured JSON, metadata is text-parsed. For richer data, use Method B.
Requires: YOUTUBE_API_KEY environment variable (free, 10,000 units/day quota).
Get key: https://console.cloud.google.com → Enable "YouTube Data API v3" → Create API key.
import requests, os
API_KEY = os.environ.get("YOUTUBE_API_KEY")
BASE = "https://www.googleapis.com/youtube/v3"
def youtube_search(query, max_results=10, order="relevance",
video_duration=None, published_after=None, lang=None):
"""
order: relevance | date | viewCount | rating | title
video_duration: short (<4min) | medium (4-20min) | long (>20min)
published_after: ISO 8601 e.g. "2024-01-01T00:00:00Z"
lang: ISO 639-1 e.g. "ru", "en"
"""
params = {
"part": "snippet",
"q": query,
"maxResults": max_results,
"type": "video",
"order": order,
"key": API_KEY,
}
if video_duration:
params["videoDuration"] = video_duration
if published_after:
params["publishedAfter"] = published_after
if lang:
params["relevanceLanguage"] = lang
r = requests.get(f"{BASE}/search", params=params)
r.raise_for_status()
items = r.json().get("items", [])
return [{
"video_id": item["id"]["videoId"],
"title": item["snippet"]["title"],
"channel": item["snippet"]["channelTitle"],
"channel_id": item["snippet"]["channelId"],
"description": item["snippet"]["description"],
"published_at": item["snippet"]["publishedAt"],
"thumbnail": item["snippet"]["thumbnails"]["high"]["url"],
"url": f"https://youtube.com/watch?v={item['id']['videoId']}"
} for item in items if item["id"].get("videoId")]
def get_video_stats(video_ids: list):
"""Pass list of video IDs, get stats back. Costs 1 quota unit per call."""
ids = ",".join(video_ids[:50]) # max 50 per request
params = {
"part": "statistics,contentDetails,snippet",
"id": ids,
"key": API_KEY,
}
r = requests.get(f"{BASE}/videos", params=params)
r.raise_for_status()
results = []
for item in r.json().get("items", []):
stats = item.get("statistics", {})
content = item.get("contentDetails", {})
results.append({
"video_id": item["id"],
"title": item["snippet"]["title"],
"views": int(stats.get("viewCount", 0)),
"likes": int(stats.get("likeCount", 0)),
"comments": int(stats.get("commentCount", 0)),
"duration_iso": content.get("duration"), # e.g. "PT5M30S"
"tags": item["snippet"].get("tags", []),
})
return results
import re
def parse_duration(iso_duration):
"""Convert PT5M30S → 330 seconds"""
match = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', iso_duration)
if not match: return 0
h, m, s = [int(x or 0) for x in match.groups()]
return h * 3600 + m * 60 + s
def search_channel(channel_name, max_results=5):
params = {
"part": "snippet",
"q": channel_name,
"type": "channel",
"maxResults": max_results,
"key": API_KEY,
}
r = requests.get(f"{BASE}/search", params=params)
channel_ids = [item["id"]["channelId"] for item in r.json().get("items", [])]
# Get channel stats
params2 = {"part": "statistics,snippet", "id": ",".join(channel_ids), "key": API_KEY}
r2 = requests.get(f"{BASE}/channels", params=params2)
return [{
"channel_id": ch["id"],
"name": ch["snippet"]["title"],
"subscribers": int(ch["statistics"].get("subscriberCount", 0)),
"total_views": int(ch["statistics"].get("viewCount", 0)),
"video_count": int(ch["statistics"].get("videoCount", 0)),
"url": f"https://youtube.com/channel/{ch['id']}"
} for ch in r2.json().get("items", [])]
| Operation | Cost |
|---|---|
| search.list | 100 units |
| videos.list (stats) | 1 unit |
| channels.list | 1 unit |
| playlists.list | 1 unit |
Tip: Search = 100 units. Get stats for 50 videos = 1 unit. Always batch videos.list calls.
Requires: SERPAPI_KEY environment variable.
Free tier: 100 searches/month. Paid plans available.
import requests, os
def serpapi_youtube_search(query, max_results=10, lang="ru"):
params = {
"engine": "youtube",
"search_query": query,
"api_key": os.environ.get("SERPAPI_KEY"),
"hl": lang, # interface language
}
r = requests.get("https://serpapi.com/search", params=params)
r.raise_for_status()
results = []
for item in r.json().get("video_results", [])[:max_results]:
results.append({
"title": item.get("title"),
"video_id": item.get("id") or item.get("link", "").split("v=")[-1],
"url": item.get("link"),
"channel": item.get("channel", {}).get("name"),
"views": item.get("views"),
"duration": item.get("length"),
"published": item.get("published_date"),
"description": item.get("description"),
"thumbnail": item.get("thumbnail", {}).get("static"),
})
return results
Advantage over YouTube API: Returns views, duration, publish date directly from search — no extra API calls needed.
Requires: pip install youtube-transcript-api --break-system-packages
No API key needed.
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
def get_transcript(video_id, languages=["ru", "en"]):
"""
Returns full transcript as string.
languages: preference order, falls back to auto-generated.
"""
try:
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
# Try preferred languages first
try:
transcript = transcript_list.find_transcript(languages)
except NoTranscriptFound:
# Fall back to any available
transcript = transcript_list.find_generated_transcript(
transcript_list._generated_transcripts.keys()
)
entries = transcript.fetch()
full_text = " ".join([e["text"] for e in entries])
return {
"video_id": video_id,
"language": transcript.language_code,
"is_generated": transcript.is_generated,
"text": full_text,
"entries": entries # list of {text, start, duration}
}
except TranscriptsDisabled:
return {"error": "Transcripts disabled for this video"}
except Exception as e:
return {"error": str(e)}
def get_available_languages(video_id):
"""List all available transcript languages for a video."""
tl = YouTubeTranscriptApi.list_transcripts(video_id)
return [{"code": t.language_code, "name": t.language, "generated": t.is_generated}
for t in tl]
Use case: After finding video IDs via Method A or B, extract full text content for analysis, summarization, or content research.
Requires: pip install yt-dlp --break-system-packages
No API key. May be blocked in sandboxed environments — test first.
import yt_dlp, json
def ytdlp_search(query, max_results=10):
"""Search YouTube with yt-dlp. Returns rich metadata."""
ydl_opts = {
"quiet": True,
"no_warnings": True,
"extract_flat": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
results = ydl.extract_info(f"ytsearch{max_results}:{query}", download=False)
return [{
"title": v.get("title"),
"video_id": v.get("id"),
"url": f"https://youtube.com/watch?v={v.get('id')}",
"duration": v.get("duration"),
"view_count": v.get("view_count"),
"channel": v.get("channel"),
"upload_date": v.get("upload_date"),
} for v in results.get("entries", []) if v]
def ytdlp_get_video_info(video_url):
"""Get full metadata for a single video."""
ydl_opts = {"quiet": True, "no_warnings": True}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=False)
return info
def ytdlp_get_subtitles(video_url, lang="ru"):
"""Download and return subtitle text."""
import tempfile, os
with tempfile.TemporaryDirectory() as tmpdir:
ydl_opts = {
"quiet": True,
"writesubtitles": True,
"writeautomaticsub": True,
"subtitleslangs": [lang, "en"],
"skip_download": True,
"outtmpl": f"{tmpdir}/%(id)s.%(ext)s",
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([video_url])
for f in os.listdir(tmpdir):
if f.endswith(".vtt") or f.endswith(".srt"):
return open(os.path.join(tmpdir, f)).read()
return None
Note: yt-dlp makes direct requests to YouTube — may be blocked in restricted network environments. Always test with a quick yt-dlp --version call first.
# 1. Search for videos
results = youtube_search("AI сервисы обзор", max_results=20,
order="viewCount", lang="ru")
# 2. Enrich with stats
video_ids = [v["video_id"] for v in results]
stats = get_video_stats(video_ids)
# 3. Get transcripts for top videos
top_videos = sorted(stats, key=lambda x: x["views"], reverse=True)[:5]
for v in top_videos:
transcript = get_transcript(v["video_id"], languages=["ru"])
# analyze, summarize, extract keywords...
# No setup required - use built-in web_search tool
# web_search("site:youtube.com AI сервисы обзор 2025")
# Parse results, extract video IDs, then use transcript API if needed
# Find channel
channels = search_channel("название канала")
channel_id = channels[0]["channel_id"]
# Get latest videos from channel
params = {
"part": "snippet",
"channelId": channel_id,
"order": "date",
"maxResults": 10,
"type": "video",
"key": API_KEY,
}
r = requests.get(f"{BASE}/search", params=params)
# Required for Method B (YouTube Data API)
export YOUTUBE_API_KEY="AIza..."
# Required for Method C (SerpAPI)
export SERPAPI_KEY="..."
# Required for Methods D & E (Python libraries)
pip install youtube-transcript-api yt-dlp --break-system-packages
references/youtube-api-quota.md — Quota optimization strategiesreferences/parsing-examples.md — Real-world parsing examples for Russian-language contentWeekly Installs
1
Repository
GitHub Stars
1
First Seen
1 day ago
Security Audits
Gen Agent Trust HubPassSocketPassSnykWarn
Installed on
junie1
amp1
cline1
opencode1
cursor1
kimi-cli1
Skills CLI 使用指南:AI Agent 技能包管理器安装与管理教程
31,600 周安装