openalex-database by davila7/claude-code-templates
npx skills add https://github.com/davila7/claude-code-templates --skill openalex-databaseOpenAlex 是一个全面的开放目录,包含超过 2.4 亿的学术成果、作者、机构、主题、来源、出版商和资助者。此技能提供了查询 OpenAlex API 的工具和工作流,用于搜索文献、分析研究成果、追踪引用和进行文献计量研究。
始终使用电子邮件地址初始化客户端以访问礼貌池(10 倍速率限制提升):
from scripts.openalex_client import OpenAlexClient
client = OpenAlexClient(email="your-email@example.edu")
使用 uv 安装所需的包:
uv pip install requests
无需 API 密钥 - OpenAlex 完全开放。
用途:通过标题、摘要或主题查找论文
# 简单搜索
results = client.search_works(
search="machine learning",
per_page=100
)
# 带过滤器的搜索
results = client.search_works(
search="CRISPR gene editing",
filter_params={
"publication_year": ">2020",
"is_oa": "true"
},
sort="cited_by_count:desc"
)
用途:获取特定研究人员的所有出版物
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
使用两步模式(实体名称 → ID → 作品):
from scripts.query_helpers import find_author_works
works = find_author_works(
author_name="Jennifer Doudna",
client=client,
limit=100
)
手动两步方法:
# 步骤 1: 获取作者 ID
author_response = client._make_request(
'/authors',
params={'search': 'Jennifer Doudna', 'per-page': 1}
)
author_id = author_response['results'][0]['id'].split('/')[-1]
# 步骤 2: 获取作品
works = client.search_works(
filter_params={"authorships.author.id": author_id}
)
用途:分析大学或组织的研究成果
from scripts.query_helpers import find_institution_works
works = find_institution_works(
institution_name="Stanford University",
client=client,
limit=200
)
用途:查找某个领域内有影响力的论文
from scripts.query_helpers import find_highly_cited_recent_papers
papers = find_highly_cited_recent_papers(
topic="quantum computing",
years=">2020",
client=client,
limit=100
)
用途:查找可免费获取的研究
from scripts.query_helpers import get_open_access_papers
papers = get_open_access_papers(
search_term="climate change",
client=client,
oa_status="any", # 或 "gold", "green", "hybrid", "bronze"
limit=200
)
用途:追踪随时间变化的研究成果
from scripts.query_helpers import get_publication_trends
trends = get_publication_trends(
search_term="artificial intelligence",
filter_params={"is_oa": "true"},
client=client
)
# 排序并显示
for trend in sorted(trends, key=lambda x: x['key'])[-10:]:
print(f"{trend['key']}: {trend['count']} publications")
用途:对作者或机构的研究进行全面分析
from scripts.query_helpers import analyze_research_output
analysis = analyze_research_output(
entity_type='institution', # 或 'author'
entity_name='MIT',
client=client,
years='>2020'
)
print(f"Total works: {analysis['total_works']}")
print(f"Open access: {analysis['open_access_percentage']}%")
print(f"Top topics: {analysis['top_topics'][:5]}")
用途:高效获取多个 DOI、ORCID 或 ID 的信息
dois = [
"https://doi.org/10.1038/s41586-021-03819-2",
"https://doi.org/10.1126/science.abc1234",
# ... 最多 50 个 DOI
]
works = client.batch_lookup(
entity_type='works',
ids=dois,
id_field='doi'
)
用途:获取用于分析的代表性样本
# 小样本
works = client.sample_works(
sample_size=100,
seed=42, # 为了可复现性
filter_params={"publication_year": "2023"}
)
# 大样本 (>10k) - 自动处理多个请求
works = client.sample_works(
sample_size=25000,
seed=42,
filter_params={"is_oa": "true"}
)
用途:查找引用特定作品的论文
# 获取作品
work = client.get_entity('works', 'https://doi.org/10.1038/s41586-021-03819-2')
# 使用 cited_by_api_url 获取引用论文
import requests
citing_response = requests.get(
work['cited_by_api_url'],
params={'mailto': client.email, 'per-page': 200}
)
citing_works = citing_response.json()['results']
用途:理解研究重点领域
# 获取机构的热门主题
topics = client.group_by(
entity_type='works',
group_field='topics.id',
filter_params={
"authorships.institutions.id": "I136199984", # MIT
"publication_year": ">2020"
}
)
for topic in topics[:10]:
print(f"{topic['key_display_name']}: {topic['count']} works")
用途:下载大型数据集进行分析
# 对所有结果进行分页
all_papers = client.paginate_all(
endpoint='/works',
params={
'search': 'synthetic biology',
'filter': 'publication_year:2020-2024'
},
max_results=10000
)
# 导出到 CSV
import csv
with open('papers.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Title', 'Year', 'Citations', 'DOI', 'OA Status'])
for paper in all_papers:
writer.writerow([
paper.get('title', 'N/A'),
paper.get('publication_year', 'N/A'),
paper.get('cited_by_count', 0),
paper.get('doi', 'N/A'),
paper.get('open_access', {}).get('oa_status', 'closed')
])
添加电子邮件以获得 10 倍速率限制(1 次请求/秒 → 10 次请求/秒):
client = OpenAlexClient(email="your-email@example.edu")
切勿直接按实体名称过滤 - 始终先获取 ID:
# ✅ 正确
# 1. 搜索实体 → 获取 ID
# 2. 按 ID 过滤
# ❌ 错误
# filter=author_name:Einstein # 这不起作用!
始终使用 per-page=200 以高效检索数据:
results = client.search_works(search="topic", per_page=200)
对多个 ID 使用 batch_lookup(),而不是单独的请求:
# ✅ 正确 - 1 个请求处理 50 个 DOI
works = client.batch_lookup('works', doi_list, 'doi')
# ❌ 错误 - 50 个单独的请求
for doi in doi_list:
work = client.get_entity('works', doi)
使用带种子的 sample_works() 进行可复现的随机抽样:
# ✅ 正确
works = client.sample_works(sample_size=100, seed=42)
# ❌ 错误 - 随机页码会偏差结果
# 使用随机页码无法获得真正的随机样本
通过选择特定字段来减少响应大小:
results = client.search_works(
search="topic",
select=['id', 'title', 'publication_year', 'cited_by_count']
)
# 单一年份
filter_params={"publication_year": "2023"}
# 某年之后
filter_params={"publication_year": ">2020"}
# 范围
filter_params={"publication_year": "2020-2024"}
# 所有条件必须匹配
filter_params={
"publication_year": ">2020",
"is_oa": "true",
"cited_by_count": ">100"
}
# 任何机构匹配
filter_params={
"authorships.institutions.id": "I136199984|I27837315" # MIT 或 Harvard
}
# 作者来自两个机构的论文
filter_params={
"authorships.institutions.id": "I136199984+I27837315" # MIT 和 Harvard
}
# 排除类型
filter_params={
"type": "!paratext"
}
OpenAlex 提供以下实体类型:
使用一致的模式访问任何实体类型:
client.search_works(...)
client.get_entity('authors', author_id)
client.group_by('works', 'topics.id', filter_params={...})
直接使用外部标识符:
# 作品的 DOI
work = client.get_entity('works', 'https://doi.org/10.7717/peerj.4375')
# 作者的 ORCID
author = client.get_entity('authors', 'https://orcid.org/0000-0003-1613-5981')
# 机构的 ROR
institution = client.get_entity('institutions', 'https://ror.org/02y3ad647')
# 来源的 ISSN
source = client.get_entity('sources', 'issn:0028-0836')
查看 references/api_guide.md 了解:
查看 references/common_queries.md 了解:
主要的 API 客户端,包含:
用于具有完全控制的直接 API 访问。
用于常见操作的高级辅助函数:
find_author_works() - 获取作者论文find_institution_works() - 获取机构论文find_highly_cited_recent_papers() - 获取有影响力的论文get_open_access_papers() - 查找 OA 出版物get_publication_trends() - 分析随时间变化的趋势analyze_research_output() - 全面分析用于具有简化接口的常见研究查询。
如果遇到 403 错误:
如果搜索未返回结果:
references/api_guide.md)对于大型查询:
per-page=200 进行分页select= 限制返回的字段始终通过向客户端提供电子邮件来在生产工作流中使用礼貌池。
每周安装数
190
代码仓库
GitHub 星标数
23.4K
首次出现
2026 年 1 月 21 日
安全审计
安装于
opencode156
claude-code151
gemini-cli146
cursor140
codex136
github-copilot125
OpenAlex is a comprehensive open catalog of 240M+ scholarly works, authors, institutions, topics, sources, publishers, and funders. This skill provides tools and workflows for querying the OpenAlex API to search literature, analyze research output, track citations, and conduct bibliometric studies.
Always initialize the client with an email address to access the polite pool (10x rate limit boost):
from scripts.openalex_client import OpenAlexClient
client = OpenAlexClient(email="your-email@example.edu")
Install required package using uv:
uv pip install requests
No API key required - OpenAlex is completely open.
Use for : Finding papers by title, abstract, or topic
# Simple search
results = client.search_works(
search="machine learning",
per_page=100
)
# Search with filters
results = client.search_works(
search="CRISPR gene editing",
filter_params={
"publication_year": ">2020",
"is_oa": "true"
},
sort="cited_by_count:desc"
)
Use for : Getting all publications by a specific researcher
Use the two-step pattern (entity name → ID → works):
from scripts.query_helpers import find_author_works
works = find_author_works(
author_name="Jennifer Doudna",
client=client,
limit=100
)
Manual two-step approach :
# Step 1: Get author ID
author_response = client._make_request(
'/authors',
params={'search': 'Jennifer Doudna', 'per-page': 1}
)
author_id = author_response['results'][0]['id'].split('/')[-1]
# Step 2: Get works
works = client.search_works(
filter_params={"authorships.author.id": author_id}
)
Use for : Analyzing research output from universities or organizations
from scripts.query_helpers import find_institution_works
works = find_institution_works(
institution_name="Stanford University",
client=client,
limit=200
)
Use for : Finding influential papers in a field
from scripts.query_helpers import find_highly_cited_recent_papers
papers = find_highly_cited_recent_papers(
topic="quantum computing",
years=">2020",
client=client,
limit=100
)
Use for : Finding freely available research
from scripts.query_helpers import get_open_access_papers
papers = get_open_access_papers(
search_term="climate change",
client=client,
oa_status="any", # or "gold", "green", "hybrid", "bronze"
limit=200
)
Use for : Tracking research output over time
from scripts.query_helpers import get_publication_trends
trends = get_publication_trends(
search_term="artificial intelligence",
filter_params={"is_oa": "true"},
client=client
)
# Sort and display
for trend in sorted(trends, key=lambda x: x['key'])[-10:]:
print(f"{trend['key']}: {trend['count']} publications")
Use for : Comprehensive analysis of author or institution research
from scripts.query_helpers import analyze_research_output
analysis = analyze_research_output(
entity_type='institution', # or 'author'
entity_name='MIT',
client=client,
years='>2020'
)
print(f"Total works: {analysis['total_works']}")
print(f"Open access: {analysis['open_access_percentage']}%")
print(f"Top topics: {analysis['top_topics'][:5]}")
Use for : Getting information for multiple DOIs, ORCIDs, or IDs efficiently
dois = [
"https://doi.org/10.1038/s41586-021-03819-2",
"https://doi.org/10.1126/science.abc1234",
# ... up to 50 DOIs
]
works = client.batch_lookup(
entity_type='works',
ids=dois,
id_field='doi'
)
Use for : Getting representative samples for analysis
# Small sample
works = client.sample_works(
sample_size=100,
seed=42, # For reproducibility
filter_params={"publication_year": "2023"}
)
# Large sample (>10k) - automatically handles multiple requests
works = client.sample_works(
sample_size=25000,
seed=42,
filter_params={"is_oa": "true"}
)
Use for : Finding papers that cite a specific work
# Get the work
work = client.get_entity('works', 'https://doi.org/10.1038/s41586-021-03819-2')
# Get citing papers using cited_by_api_url
import requests
citing_response = requests.get(
work['cited_by_api_url'],
params={'mailto': client.email, 'per-page': 200}
)
citing_works = citing_response.json()['results']
Use for : Understanding research focus areas
# Get top topics for an institution
topics = client.group_by(
entity_type='works',
group_field='topics.id',
filter_params={
"authorships.institutions.id": "I136199984", # MIT
"publication_year": ">2020"
}
)
for topic in topics[:10]:
print(f"{topic['key_display_name']}: {topic['count']} works")
Use for : Downloading large datasets for analysis
# Paginate through all results
all_papers = client.paginate_all(
endpoint='/works',
params={
'search': 'synthetic biology',
'filter': 'publication_year:2020-2024'
},
max_results=10000
)
# Export to CSV
import csv
with open('papers.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['Title', 'Year', 'Citations', 'DOI', 'OA Status'])
for paper in all_papers:
writer.writerow([
paper.get('title', 'N/A'),
paper.get('publication_year', 'N/A'),
paper.get('cited_by_count', 0),
paper.get('doi', 'N/A'),
paper.get('open_access', {}).get('oa_status', 'closed')
])
Add email to get 10x rate limit (1 req/sec → 10 req/sec):
client = OpenAlexClient(email="your-email@example.edu")
Never filter by entity names directly - always get ID first:
# ✅ Correct
# 1. Search for entity → get ID
# 2. Filter by ID
# ❌ Wrong
# filter=author_name:Einstein # This doesn't work!
Always use per-page=200 for efficient data retrieval:
results = client.search_works(search="topic", per_page=200)
Use batch_lookup() for multiple IDs instead of individual requests:
# ✅ Correct - 1 request for 50 DOIs
works = client.batch_lookup('works', doi_list, 'doi')
# ❌ Wrong - 50 separate requests
for doi in doi_list:
work = client.get_entity('works', doi)
Use sample_works() with seed for reproducible random sampling:
# ✅ Correct
works = client.sample_works(sample_size=100, seed=42)
# ❌ Wrong - random page numbers bias results
# Using random page numbers doesn't give true random sample
Reduce response size by selecting specific fields:
results = client.search_works(
search="topic",
select=['id', 'title', 'publication_year', 'cited_by_count']
)
# Single year
filter_params={"publication_year": "2023"}
# After year
filter_params={"publication_year": ">2020"}
# Range
filter_params={"publication_year": "2020-2024"}
# All conditions must match
filter_params={
"publication_year": ">2020",
"is_oa": "true",
"cited_by_count": ">100"
}
# Any institution matches
filter_params={
"authorships.institutions.id": "I136199984|I27837315" # MIT or Harvard
}
# Papers with authors from BOTH institutions
filter_params={
"authorships.institutions.id": "I136199984+I27837315" # MIT AND Harvard
}
# Exclude type
filter_params={
"type": "!paratext"
}
OpenAlex provides these entity types:
Access any entity type using consistent patterns:
client.search_works(...)
client.get_entity('authors', author_id)
client.group_by('works', 'topics.id', filter_params={...})
Use external identifiers directly:
# DOI for works
work = client.get_entity('works', 'https://doi.org/10.7717/peerj.4375')
# ORCID for authors
author = client.get_entity('authors', 'https://orcid.org/0000-0003-1613-5981')
# ROR for institutions
institution = client.get_entity('institutions', 'https://ror.org/02y3ad647')
# ISSN for sources
source = client.get_entity('sources', 'issn:0028-0836')
See references/api_guide.md for:
See references/common_queries.md for:
Main API client with:
Use for direct API access with full control.
High-level helper functions for common operations:
find_author_works() - Get papers by authorfind_institution_works() - Get papers from institutionfind_highly_cited_recent_papers() - Get influential papersget_open_access_papers() - Find OA publicationsget_publication_trends() - Analyze trends over timeanalyze_research_output() - Comprehensive analysisUse for common research queries with simplified interfaces.
If encountering 403 errors:
If searches return no results:
references/api_guide.md)For large queries:
per-page=200select= to limit returned fieldsAlways use polite pool for production workflows by providing email to client.
Weekly Installs
190
Repository
GitHub Stars
23.4K
First Seen
Jan 21, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykPass
Installed on
opencode156
claude-code151
gemini-cli146
cursor140
codex136
github-copilot125
DOCX文件创建、编辑与分析完整指南 - 使用docx-js、Pandoc和Python脚本
48,500 周安装
Ashby Automation - 在 Claude Code 中自动化 Ashby ATS 招聘流程
Codervisor Forge:Rust+Node.js混合项目开发工具包,一站式脚手架与发布方案
Rust+Node.js混合项目脚手架:一键搭建规范开发、CI/CD、发布与版本控制
Scaleway 一键部署技能 - 无需 DevOps 知识,3分钟自动配置 VPS 服务器
NotebookLM Research 技能:AI 研究助手,多源信息综合与音频摘要生成
G-Pilot工具架构指南:Action-Ledger-Result模式与Google AI工具开发