tooluniverse-sdk by mims-harvard/tooluniverse
npx skills add https://github.com/mims-harvard/tooluniverse --skill tooluniverse-sdkToolUniverse 通过统一的接口提供对 1000++ 科学工具的程序化访问。它实现了 AI-工具交互协议,用于构建集成机器学习模型、数据库、API 和科学软件包的 AI 科学家系统。
重要提示 - 语言处理:大多数工具仅接受英文术语。在构建工作流时,请始终在将非英语输入传递给工具参数之前将其翻译成英语。只有在英语查询没有返回结果时,才尝试使用原始语言术语作为备选方案。
# 标准安装
pip install tooluniverse
# 包含可选功能
pip install tooluniverse[embedding] # 嵌入搜索 (GPU)
pip install tooluniverse[ml] # 机器学习模型工具
pip install tooluniverse[all] # 所有功能
# 基于 LLM 的工具搜索和钩子功能所需
export OPENAI_API_KEY="sk-..."
# 可选,用于更高的速率限制
export NCBI_API_KEY="..."
或使用 .env 文件:
from dotenv import load_dotenv
load_dotenv()
from tooluniverse import ToolUniverse
# 1. 初始化并加载工具
tu = ToolUniverse()
tu.load_tools() # 加载 1000++ 个工具 (首次加载约 5-10 秒)
# 2. 查找工具 (三种方法)
# 方法 A: 关键词 (快速,无需 API 密钥)
tools = tu.run({
"name": "Tool_Finder_Keyword",
"arguments": {"description": "protein structure", "limit": 10}
})
# 方法 B: LLM (智能,需要 OPENAI_API_KEY)
tools = tu.run({
"name": "Tool_Finder_LLM",
"arguments": {"description": "predict drug toxicity", "limit": 5}
})
# 方法 C: 嵌入 (语义,需要 GPU)
tools = tu.run({
"name": "Tool_Finder",
"arguments": {"description": "protein interactions", "limit": 10}
})
# 3. 执行工具 (两种方式)
# 字典 API
result = tu.run({
"name": "UniProt_get_entry_by_accession",
"arguments": {"accession": "P05067"}
})
# 函数 API (推荐)
result = tu.tools.UniProt_get_entry_by_accession(accession="P05067")
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
# 查找工具
tools = tu.run({
"name": "Tool_Finder_Keyword",
"arguments": {"description": "ADMET prediction", "limit": 3}
})
# 检查结果结构
if isinstance(tools, dict) and 'tools' in tools:
for tool in tools['tools']:
print(f"{tool['name']}: {tool['description']}")
# 执行工具
result = tu.tools.ADMETAI_predict_admet(
smiles="CC(C)Cc1ccc(cc1)C(C)C(O)=O"
)
# 定义调用
calls = [
{"name": "UniProt_get_entry_by_accession", "arguments": {"accession": "P05067"}},
{"name": "UniProt_get_entry_by_accession", "arguments": {"accession": "P12345"}},
{"name": "RCSB_PDB_get_structure_by_id", "arguments": {"pdb_id": "1ABC"}}
]
# 并行执行
results = tu.run_batch(calls)
def drug_discovery_pipeline(disease_id):
tu = ToolUniverse(use_cache=True)
tu.load_tools()
try:
# 获取靶点
targets = tu.tools.OpenTargets_get_associated_targets_by_disease_efoId(
efoId=disease_id
)
# 获取化合物 (批量)
compound_calls = [
{"name": "ChEMBL_search_molecule_by_target",
"arguments": {"target_id": t['id'], "limit": 10}}
for t in targets['data'][:5]
]
compounds = tu.run_batch(compound_calls)
# 预测 ADMET
admet_results = []
for comp_list in compounds:
if comp_list and 'molecules' in comp_list:
for mol in comp_list['molecules'][:3]:
admet = tu.tools.ADMETAI_predict_admet(
smiles=mol['smiles'],
use_cache=True
)
admet_results.append(admet)
return {"targets": targets, "compounds": compounds, "admet": admet_results}
finally:
tu.close()
# 全局启用
tu = ToolUniverse(use_cache=True)
tu.load_tools()
# 或按调用启用
result = tu.tools.ADMETAI_predict_admet(
smiles="...",
use_cache=True # 缓存昂贵的预测
)
# 管理缓存
stats = tu.get_cache_stats()
tu.clear_cache()
# 为大型输出启用钩子
tu = ToolUniverse(hooks_enabled=True)
tu.load_tools()
result = tu.tools.OpenTargets_get_target_gene_ontology_by_ensemblID(
ensemblId="ENSG00000012048"
)
# 检查是否已摘要
if isinstance(result, dict) and "summary" in result:
print(f"Summarized: {result['summary']}")
# 更快地加载
tu = ToolUniverse()
tu.load_tools(categories=["proteins", "drugs"])
# ❌ 错误 - 将会失败
tu = ToolUniverse()
result = tu.tools.some_tool() # 错误!
# ✅ 正确
tu = ToolUniverse()
tu.load_tools()
result = tu.tools.some_tool()
# ❌ 错误
tools = tu.run({"name": "Tool_Finder_Keyword", "arguments": {"description": "protein"}})
for tool in tools: # 错误: tools 是字典
print(tool['name'])
# ✅ 正确
if isinstance(tools, dict) and 'tools' in tools:
for tool in tools['tools']:
print(tool['name'])
# 首先检查工具模式
tool_info = tu.all_tool_dict["UniProt_get_entry_by_accession"]
required = tool_info['parameter'].get('required', [])
print(f"Required: {required}")
# 然后调用
result = tu.tools.UniProt_get_entry_by_accession(accession="P05067")
# ✅ 缓存: 机器学习预测、数据库查询 (确定性的)
result = tu.tools.ADMETAI_predict_admet(smiles="...", use_cache=True)
# ❌ 不要缓存: 实时数据、时间敏感的结果
result = tu.tools.get_latest_publications() # 不缓存
from tooluniverse.exceptions import ToolError, ToolUnavailableError
try:
result = tu.tools.UniProt_get_entry_by_accession(accession="P05067")
except ToolUnavailableError as e:
print(f"Tool unavailable: {e}")
except ToolError as e:
print(f"Execution failed: {e}")
# ❌ 错误
result = tu.tools.uniprot_get_entry_by_accession(accession="P05067")
# ✅ 正确
result = tu.tools.UniProt_get_entry_by_accession(accession="P05067")
result = tu.tools.tool_name(
param="value",
use_cache=True, # 缓存此调用
validate=True, # 验证参数 (默认)
stream_callback=None # 流式输出
)
# 1. 加载特定类别
tu.load_tools(categories=["proteins"])
# 2. 使用批量执行
results = tu.run_batch(calls)
# 3. 启用缓存
tu = ToolUniverse(use_cache=True)
# 4. 禁用验证 (测试后)
result = tu.tools.tool_name(param="value", validate=False)
# 搜索工具
tools = tu.run({
"name": "Tool_Finder_Keyword",
"arguments": {"description": "partial_name", "limit": 10}
})
# 检查是否存在
if "Tool_Name" in tu.all_tool_dict:
print("Found!")
import os
if not os.environ.get("OPENAI_API_KEY"):
print("⚠️ OPENAI_API_KEY not set")
print("Set: export OPENAI_API_KEY='sk-...'")
from tooluniverse.exceptions import ToolValidationError
try:
result = tu.tools.some_tool(param="value")
except ToolValidationError as e:
# 检查模式
tool_info = tu.all_tool_dict["some_tool"]
print(f"Required: {tool_info['parameter'].get('required', [])}")
print(f"Properties: {tool_info['parameter']['properties'].keys()}")
from tooluniverse.logging_config import set_log_level
set_log_level("DEBUG")
| 类别 | 工具 | 用例 |
|---|---|---|
| 蛋白质 | UniProt, RCSB PDB, AlphaFold | 蛋白质分析、结构 |
| 药物 | DrugBank, ChEMBL, PubChem | 药物发现、化合物 |
| 基因组学 | Ensembl, NCBI Gene, gnomAD | 基因分析、变异 |
| 疾病 | OpenTargets, ClinVar | 疾病-靶点关联 |
| 文献 | PubMed, Europe PMC | 文献搜索 |
| 机器学习模型 | ADMET-AI, AlphaFold | 预测、建模 |
| 通路 | KEGG, Reactome | 通路分析 |
examples/ 目录有关详细指南,请参阅 REFERENCE.md。
每周安装量
162
仓库
GitHub 星标数
1.2K
首次出现
2026年2月4日
安全审计
已安装于
codex154
opencode153
gemini-cli149
github-copilot146
amp141
kimi-cli140
ToolUniverse provides programmatic access to 1000++ scientific tools through a unified interface. It implements the AI-Tool Interaction Protocol for building AI scientist systems that integrate ML models, databases, APIs, and scientific packages.
IMPORTANT - Language Handling : Most tools accept English terms only. When building workflows, always translate non-English input to English before passing to tool parameters. Only try original-language terms as a fallback if English returns no results.
# Standard installation
pip install tooluniverse
# With optional features
pip install tooluniverse[embedding] # Embedding search (GPU)
pip install tooluniverse[ml] # ML model tools
pip install tooluniverse[all] # All features
# Required for LLM-based tool search and hooks
export OPENAI_API_KEY="sk-..."
# Optional for higher rate limits
export NCBI_API_KEY="..."
Or use .env file:
from dotenv import load_dotenv
load_dotenv()
from tooluniverse import ToolUniverse
# 1. Initialize and load tools
tu = ToolUniverse()
tu.load_tools() # Loads 1000++ tools (~5-10 seconds first time)
# 2. Find tools (three methods)
# Method A: Keyword (fast, no API key)
tools = tu.run({
"name": "Tool_Finder_Keyword",
"arguments": {"description": "protein structure", "limit": 10}
})
# Method B: LLM (intelligent, requires OPENAI_API_KEY)
tools = tu.run({
"name": "Tool_Finder_LLM",
"arguments": {"description": "predict drug toxicity", "limit": 5}
})
# Method C: Embedding (semantic, requires GPU)
tools = tu.run({
"name": "Tool_Finder",
"arguments": {"description": "protein interactions", "limit": 10}
})
# 3. Execute tools (two ways)
# Dictionary API
result = tu.run({
"name": "UniProt_get_entry_by_accession",
"arguments": {"accession": "P05067"}
})
# Function API (recommended)
result = tu.tools.UniProt_get_entry_by_accession(accession="P05067")
# Find tools
tools = tu.run({
"name": "Tool_Finder_Keyword",
"arguments": {"description": "ADMET prediction", "limit": 3}
})
# Check results structure
if isinstance(tools, dict) and 'tools' in tools:
for tool in tools['tools']:
print(f"{tool['name']}: {tool['description']}")
# Execute tool
result = tu.tools.ADMETAI_predict_admet(
smiles="CC(C)Cc1ccc(cc1)C(C)C(O)=O"
)
# Define calls
calls = [
{"name": "UniProt_get_entry_by_accession", "arguments": {"accession": "P05067"}},
{"name": "UniProt_get_entry_by_accession", "arguments": {"accession": "P12345"}},
{"name": "RCSB_PDB_get_structure_by_id", "arguments": {"pdb_id": "1ABC"}}
]
# Execute in parallel
results = tu.run_batch(calls)
def drug_discovery_pipeline(disease_id):
tu = ToolUniverse(use_cache=True)
tu.load_tools()
try:
# Get targets
targets = tu.tools.OpenTargets_get_associated_targets_by_disease_efoId(
efoId=disease_id
)
# Get compounds (batch)
compound_calls = [
{"name": "ChEMBL_search_molecule_by_target",
"arguments": {"target_id": t['id'], "limit": 10}}
for t in targets['data'][:5]
]
compounds = tu.run_batch(compound_calls)
# Predict ADMET
admet_results = []
for comp_list in compounds:
if comp_list and 'molecules' in comp_list:
for mol in comp_list['molecules'][:3]:
admet = tu.tools.ADMETAI_predict_admet(
smiles=mol['smiles'],
use_cache=True
)
admet_results.append(admet)
return {"targets": targets, "compounds": compounds, "admet": admet_results}
finally:
tu.close()
# Enable globally
tu = ToolUniverse(use_cache=True)
tu.load_tools()
# Or per-call
result = tu.tools.ADMETAI_predict_admet(
smiles="...",
use_cache=True # Cache expensive predictions
)
# Manage cache
stats = tu.get_cache_stats()
tu.clear_cache()
# Enable hooks for large outputs
tu = ToolUniverse(hooks_enabled=True)
tu.load_tools()
result = tu.tools.OpenTargets_get_target_gene_ontology_by_ensemblID(
ensemblId="ENSG00000012048"
)
# Check if summarized
if isinstance(result, dict) and "summary" in result:
print(f"Summarized: {result['summary']}")
# Faster loading
tu = ToolUniverse()
tu.load_tools(categories=["proteins", "drugs"])
# ❌ Wrong - will fail
tu = ToolUniverse()
result = tu.tools.some_tool() # Error!
# ✅ Correct
tu = ToolUniverse()
tu.load_tools()
result = tu.tools.some_tool()
# ❌ Wrong
tools = tu.run({"name": "Tool_Finder_Keyword", "arguments": {"description": "protein"}})
for tool in tools: # Error: tools is dict
print(tool['name'])
# ✅ Correct
if isinstance(tools, dict) and 'tools' in tools:
for tool in tools['tools']:
print(tool['name'])
# Check tool schema first
tool_info = tu.all_tool_dict["UniProt_get_entry_by_accession"]
required = tool_info['parameter'].get('required', [])
print(f"Required: {required}")
# Then call
result = tu.tools.UniProt_get_entry_by_accession(accession="P05067")
# ✅ Cache: ML predictions, database queries (deterministic)
result = tu.tools.ADMETAI_predict_admet(smiles="...", use_cache=True)
# ❌ Don't cache: real-time data, time-sensitive results
result = tu.tools.get_latest_publications() # No cache
from tooluniverse.exceptions import ToolError, ToolUnavailableError
try:
result = tu.tools.UniProt_get_entry_by_accession(accession="P05067")
except ToolUnavailableError as e:
print(f"Tool unavailable: {e}")
except ToolError as e:
print(f"Execution failed: {e}")
# ❌ Wrong
result = tu.tools.uniprot_get_entry_by_accession(accession="P05067")
# ✅ Correct
result = tu.tools.UniProt_get_entry_by_accession(accession="P05067")
result = tu.tools.tool_name(
param="value",
use_cache=True, # Cache this call
validate=True, # Validate parameters (default)
stream_callback=None # Streaming output
)
# 1. Load specific categories
tu.load_tools(categories=["proteins"])
# 2. Use batch execution
results = tu.run_batch(calls)
# 3. Enable caching
tu = ToolUniverse(use_cache=True)
# 4. Disable validation (after testing)
result = tu.tools.tool_name(param="value", validate=False)
# Search for tool
tools = tu.run({
"name": "Tool_Finder_Keyword",
"arguments": {"description": "partial_name", "limit": 10}
})
# Check if exists
if "Tool_Name" in tu.all_tool_dict:
print("Found!")
import os
if not os.environ.get("OPENAI_API_KEY"):
print("⚠️ OPENAI_API_KEY not set")
print("Set: export OPENAI_API_KEY='sk-...'")
from tooluniverse.exceptions import ToolValidationError
try:
result = tu.tools.some_tool(param="value")
except ToolValidationError as e:
# Check schema
tool_info = tu.all_tool_dict["some_tool"]
print(f"Required: {tool_info['parameter'].get('required', [])}")
print(f"Properties: {tool_info['parameter']['properties'].keys()}")
from tooluniverse.logging_config import set_log_level
set_log_level("DEBUG")
| Category | Tools | Use Cases |
|---|---|---|
| Proteins | UniProt, RCSB PDB, AlphaFold | Protein analysis, structure |
| Drugs | DrugBank, ChEMBL, PubChem | Drug discovery, compounds |
| Genomics | Ensembl, NCBI Gene, gnomAD | Gene analysis, variants |
| Diseases | OpenTargets, ClinVar | Disease-target associations |
| Literature | PubMed, Europe PMC | Literature search |
| ML Models | ADMET-AI, AlphaFold | Predictions, modeling |
| Pathways | KEGG, Reactome | Pathway analysis |
examples/ directory in repositoryFor detailed guides, see REFERENCE.md.
Weekly Installs
162
Repository
GitHub Stars
1.2K
First Seen
Feb 4, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykWarn
Installed on
codex154
opencode153
gemini-cli149
github-copilot146
amp141
kimi-cli140
PPTX 文件处理全攻略:Python 脚本创建、编辑、分析 .pptx 文件内容与结构
915 周安装
SQL查询优化指南:PostgreSQL、Snowflake、BigQuery高性能SQL编写技巧与方言参考
1,100 周安装
统计分析技能指南:描述性统计、趋势分析与异常值检测方法
1,000 周安装
供应链风险审计员 - 开源项目依赖项安全审计工具 | 识别高风险依赖项
1,100 周安装
企业法律风险评估框架:基于严重性与可能性的风险矩阵与分类指南
1,100 周安装
Valyu API 最佳实践指南:搜索、内容提取、AI 答案与深度研究
1,300 周安装
ralphinho-rfc-pipeline:AI驱动的复杂功能分解与多单元编排工作流工具
1,300 周安装