dspy by davila7/claude-code-templates
npx skills add https://github.com/davila7/claude-code-templates --skill dspy在以下情况时使用 DSPy:
GitHub 星标数 : 22,000+ | 创建者 : Stanford NLP
# 稳定版本
pip install dspy
# 最新开发版本
pip install git+https://github.com/stanfordnlp/dspy.git
# 安装特定 LM 提供商支持
pip install dspy[openai] # OpenAI
pip install dspy[anthropic] # Anthropic Claude
pip install dspy[all] # 所有提供商
import dspy
# 配置你的语言模型
lm = dspy.Claude(model="claude-sonnet-4-5-20250929")
dspy.settings.configure(lm=lm)
# 定义一个签名(输入 → 输出)
class QA(dspy.Signature):
"""用简短的事实性答案回答问题。"""
question = dspy.InputField()
answer = dspy.OutputField(desc="通常为 1 到 5 个单词")
# 创建一个模块
qa = dspy.Predict(QA)
# 使用它
response = qa(question="What is the capital of France?")
print(response.answer) # "Paris"
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
import dspy
lm = dspy.Claude(model="claude-sonnet-4-5-20250929")
dspy.settings.configure(lm=lm)
# 使用 ChainOfThought 进行更好的推理
class MathProblem(dspy.Signature):
"""解决数学应用题。"""
problem = dspy.InputField()
answer = dspy.OutputField(desc="数值答案")
# ChainOfThought 自动生成推理步骤
cot = dspy.ChainOfThought(MathProblem)
response = cot(problem="If John has 5 apples and gives 2 to Mary, how many does he have?")
print(response.rationale) # 显示推理步骤
print(response.answer) # "3"
签名定义了你的 AI 任务的结构(输入 → 输出):
# 内联签名(简单)
qa = dspy.Predict("question -> answer")
# 类签名(详细)
class Summarize(dspy.Signature):
"""将文本总结为要点。"""
text = dspy.InputField()
summary = dspy.OutputField(desc="要点,3-5 项")
summarizer = dspy.ChainOfThought(Summarize)
何时使用每种方式:
模块是可重用的组件,将输入转换为输出:
基础预测模块:
predictor = dspy.Predict("context, question -> answer")
result = predictor(context="Paris is the capital of France",
question="What is the capital?")
在回答前生成推理步骤:
cot = dspy.ChainOfThought("question -> answer")
result = cot(question="Why is the sky blue?")
print(result.rationale) # 推理步骤
print(result.answer) # 最终答案
使用工具的类智能体推理:
from dspy.predict import ReAct
class SearchQA(dspy.Signature):
"""使用搜索回答问题。"""
question = dspy.InputField()
answer = dspy.OutputField()
def search_tool(query: str) -> str:
"""搜索 Wikipedia。"""
# 你的搜索实现
return results
react = ReAct(SearchQA, tools=[search_tool])
result = react(question="When was Python created?")
生成并执行代码进行推理:
pot = dspy.ProgramOfThought("question -> answer")
result = pot(question="What is 15% of 240?")
# 生成: answer = 240 * 0.15
优化器使用训练数据自动改进你的模块:
从示例中学习:
from dspy.teleprompt import BootstrapFewShot
# 训练数据
trainset = [
dspy.Example(question="What is 2+2?", answer="4").with_inputs("question"),
dspy.Example(question="What is 3+5?", answer="8").with_inputs("question"),
]
# 定义评估指标
def validate_answer(example, pred, trace=None):
return example.answer == pred.answer
# 优化
optimizer = BootstrapFewShot(metric=validate_answer, max_bootstrapped_demos=3)
optimized_qa = optimizer.compile(qa, trainset=trainset)
# 现在 optimized_qa 表现更好了!
迭代改进提示:
from dspy.teleprompt import MIPRO
optimizer = MIPRO(
metric=validate_answer,
num_candidates=10,
init_temperature=1.0
)
optimized_cot = optimizer.compile(
cot,
trainset=trainset,
num_trials=100
)
为模型微调创建数据集:
from dspy.teleprompt import BootstrapFinetune
optimizer = BootstrapFinetune(metric=validate_answer)
optimized_module = optimizer.compile(qa, trainset=trainset)
# 导出用于微调的训练数据
import dspy
class MultiHopQA(dspy.Module):
def __init__(self):
super().__init__()
self.retrieve = dspy.Retrieve(k=3)
self.generate_query = dspy.ChainOfThought("question -> search_query")
self.generate_answer = dspy.ChainOfThought("context, question -> answer")
def forward(self, question):
# 阶段 1:生成搜索查询
search_query = self.generate_query(question=question).search_query
# 阶段 2:检索上下文
passages = self.retrieve(search_query).passages
context = "\n".join(passages)
# 阶段 3:生成答案
answer = self.generate_answer(context=context, question=question).answer
return dspy.Prediction(answer=answer, context=context)
# 使用该流水线
qa_system = MultiHopQA()
result = qa_system(question="Who wrote the book that inspired the movie Blade Runner?")
import dspy
from dspy.retrieve.chromadb_rm import ChromadbRM
# 配置检索器
retriever = ChromadbRM(
collection_name="documents",
persist_directory="./chroma_db"
)
class RAG(dspy.Module):
def __init__(self, num_passages=3):
super().__init__()
self.retrieve = dspy.Retrieve(k=num_passages)
self.generate = dspy.ChainOfThought("context, question -> answer")
def forward(self, question):
context = self.retrieve(question).passages
return self.generate(context=context, question=question)
# 创建并优化
rag = RAG()
# 使用训练数据进行优化
from dspy.teleprompt import BootstrapFewShot
optimizer = BootstrapFewShot(metric=validate_answer)
optimized_rag = optimizer.compile(rag, trainset=trainset)
import dspy
lm = dspy.Claude(
model="claude-sonnet-4-5-20250929",
api_key="your-api-key", # 或设置 ANTHROPIC_API_KEY 环境变量
max_tokens=1000,
temperature=0.7
)
dspy.settings.configure(lm=lm)
lm = dspy.OpenAI(
model="gpt-4",
api_key="your-api-key",
max_tokens=1000
)
dspy.settings.configure(lm=lm)
lm = dspy.OllamaLocal(
model="llama3.1",
base_url="http://localhost:11434"
)
dspy.settings.configure(lm=lm)
# 为不同任务使用不同模型
cheap_lm = dspy.OpenAI(model="gpt-3.5-turbo")
strong_lm = dspy.Claude(model="claude-sonnet-4-5-20250929")
# 使用廉价模型进行检索,强大模型进行推理
with dspy.settings.context(lm=cheap_lm):
context = retriever(question)
with dspy.settings.context(lm=strong_lm):
answer = generator(context=context, question=question)
from pydantic import BaseModel, Field
class PersonInfo(BaseModel):
name: str = Field(description="Full name")
age: int = Field(description="Age in years")
occupation: str = Field(description="Current job")
class ExtractPerson(dspy.Signature):
"""从文本中提取人物信息。"""
text = dspy.InputField()
person: PersonInfo = dspy.OutputField()
extractor = dspy.TypedPredictor(ExtractPerson)
result = extractor(text="John Doe is a 35-year-old software engineer.")
print(result.person.name) # "John Doe"
print(result.person.age) # 35
import dspy
from dspy.primitives.assertions import assert_transform_module, backtrack_handler
class MathQA(dspy.Module):
def __init__(self):
super().__init__()
self.solve = dspy.ChainOfThought("problem -> solution: float")
def forward(self, problem):
solution = self.solve(problem=problem).solution
# 断言解决方案是数值
dspy.Assert(
isinstance(float(solution), float),
"Solution must be a number",
backtrack=backtrack_handler
)
return dspy.Prediction(solution=solution)
import dspy
from collections import Counter
class ConsistentQA(dspy.Module):
def __init__(self, num_samples=5):
super().__init__()
self.qa = dspy.ChainOfThought("question -> answer")
self.num_samples = num_samples
def forward(self, question):
# 生成多个答案
answers = []
for _ in range(self.num_samples):
result = self.qa(question=question)
answers.append(result.answer)
# 返回最常见的答案
most_common = Counter(answers).most_common(1)[0][0]
return dspy.Prediction(answer=most_common)
class RerankedRAG(dspy.Module):
def __init__(self):
super().__init__()
self.retrieve = dspy.Retrieve(k=10)
self.rerank = dspy.Predict("question, passage -> relevance_score: float")
self.answer = dspy.ChainOfThought("context, question -> answer")
def forward(self, question):
# 检索候选
passages = self.retrieve(question).passages
# 重排段落
scored = []
for passage in passages:
score = float(self.rerank(question=question, passage=passage).relevance_score)
scored.append((score, passage))
# 取前 3 个
top_passages = [p for _, p in sorted(scored, reverse=True)[:3]]
context = "\n\n".join(top_passages)
# 生成答案
return self.answer(context=context, question=question)
def exact_match(example, pred, trace=None):
"""精确匹配指标。"""
return example.answer.lower() == pred.answer.lower()
def f1_score(example, pred, trace=None):
"""文本重叠的 F1 分数。"""
pred_tokens = set(pred.answer.lower().split())
gold_tokens = set(example.answer.lower().split())
if not pred_tokens:
return 0.0
precision = len(pred_tokens & gold_tokens) / len(pred_tokens)
recall = len(pred_tokens & gold_tokens) / len(gold_tokens)
if precision + recall == 0:
return 0.0
return 2 * (precision * recall) / (precision + recall)
from dspy.evaluate import Evaluate
# 创建评估器
evaluator = Evaluate(
devset=testset,
metric=exact_match,
num_threads=4,
display_progress=True
)
# 评估模型
score = evaluator(qa_system)
print(f"Accuracy: {score}")
# 比较优化前后
score_before = evaluator(qa)
score_after = evaluator(optimized_qa)
print(f"Improvement: {score_after - score_before:.2%}")
# 从 Predict 开始
qa = dspy.Predict("question -> answer")
# 如果需要,添加推理
qa = dspy.ChainOfThought("question -> answer")
# 当你有数据时,添加优化
optimized_qa = optimizer.compile(qa, trainset=data)
# ❌ 差:模糊
class Task(dspy.Signature):
input = dspy.InputField()
output = dspy.OutputField()
# ✅ 好:描述性
class SummarizeArticle(dspy.Signature):
"""将新闻文章总结为 3-5 个要点。"""
article = dspy.InputField(desc="完整的文章文本")
summary = dspy.OutputField(desc="要点,3-5 项")
# 创建多样化的训练示例
trainset = [
dspy.Example(question="factual", answer="...).with_inputs("question"),
dspy.Example(question="reasoning", answer="...").with_inputs("question"),
dspy.Example(question="calculation", answer="...").with_inputs("question"),
]
# 使用验证集进行评估
def metric(example, pred, trace=None):
return example.answer in pred.answer
# 保存
optimized_qa.save("models/qa_v1.json")
# 加载
loaded_qa = dspy.ChainOfThought("question -> answer")
loaded_qa.load("models/qa_v1.json")
# 启用追踪
dspy.settings.configure(lm=lm, trace=[])
# 运行预测
result = qa(question="...")
# 检查追踪记录
for call in dspy.settings.trace:
print(f"Prompt: {call['prompt']}")
print(f"Response: {call['response']}")
| 功能 | 手动提示 | LangChain | DSPy |
|---|---|---|---|
| 提示工程 | 手动 | 手动 | 自动 |
| 优化 | 试错 | 无 | 数据驱动 |
| 模块化 | 低 | 中等 | 高 |
| 类型安全 | 否 | 有限 | 是(签名) |
| 可移植性 | 低 | 中等 | 高 |
| 学习曲线 | 低 | 中等 | 中高 |
何时选择 DSPy:
何时选择替代方案:
references/modules.md - 详细模块指南(Predict, ChainOfThought, ReAct, ProgramOfThought)references/optimizers.md - 优化算法(BootstrapFewShot, MIPRO, BootstrapFinetune)references/examples.md - 真实世界示例(RAG, 智能体, 分类器)每周安装量
183
代码仓库
GitHub 星标数
22.6K
首次出现
Jan 21, 2026
安全审计
安装于
opencode150
claude-code143
gemini-cli140
codex128
cursor128
github-copilot122
Use DSPy when you need to:
GitHub Stars : 22,000+ | Created By : Stanford NLP
# Stable release
pip install dspy
# Latest development version
pip install git+https://github.com/stanfordnlp/dspy.git
# With specific LM providers
pip install dspy[openai] # OpenAI
pip install dspy[anthropic] # Anthropic Claude
pip install dspy[all] # All providers
import dspy
# Configure your language model
lm = dspy.Claude(model="claude-sonnet-4-5-20250929")
dspy.settings.configure(lm=lm)
# Define a signature (input → output)
class QA(dspy.Signature):
"""Answer questions with short factual answers."""
question = dspy.InputField()
answer = dspy.OutputField(desc="often between 1 and 5 words")
# Create a module
qa = dspy.Predict(QA)
# Use it
response = qa(question="What is the capital of France?")
print(response.answer) # "Paris"
import dspy
lm = dspy.Claude(model="claude-sonnet-4-5-20250929")
dspy.settings.configure(lm=lm)
# Use ChainOfThought for better reasoning
class MathProblem(dspy.Signature):
"""Solve math word problems."""
problem = dspy.InputField()
answer = dspy.OutputField(desc="numerical answer")
# ChainOfThought generates reasoning steps automatically
cot = dspy.ChainOfThought(MathProblem)
response = cot(problem="If John has 5 apples and gives 2 to Mary, how many does he have?")
print(response.rationale) # Shows reasoning steps
print(response.answer) # "3"
Signatures define the structure of your AI task (inputs → outputs):
# Inline signature (simple)
qa = dspy.Predict("question -> answer")
# Class signature (detailed)
class Summarize(dspy.Signature):
"""Summarize text into key points."""
text = dspy.InputField()
summary = dspy.OutputField(desc="bullet points, 3-5 items")
summarizer = dspy.ChainOfThought(Summarize)
When to use each:
Modules are reusable components that transform inputs to outputs:
Basic prediction module:
predictor = dspy.Predict("context, question -> answer")
result = predictor(context="Paris is the capital of France",
question="What is the capital?")
Generates reasoning steps before answering:
cot = dspy.ChainOfThought("question -> answer")
result = cot(question="Why is the sky blue?")
print(result.rationale) # Reasoning steps
print(result.answer) # Final answer
Agent-like reasoning with tools:
from dspy.predict import ReAct
class SearchQA(dspy.Signature):
"""Answer questions using search."""
question = dspy.InputField()
answer = dspy.OutputField()
def search_tool(query: str) -> str:
"""Search Wikipedia."""
# Your search implementation
return results
react = ReAct(SearchQA, tools=[search_tool])
result = react(question="When was Python created?")
Generates and executes code for reasoning:
pot = dspy.ProgramOfThought("question -> answer")
result = pot(question="What is 15% of 240?")
# Generates: answer = 240 * 0.15
Optimizers improve your modules automatically using training data:
Learns from examples:
from dspy.teleprompt import BootstrapFewShot
# Training data
trainset = [
dspy.Example(question="What is 2+2?", answer="4").with_inputs("question"),
dspy.Example(question="What is 3+5?", answer="8").with_inputs("question"),
]
# Define metric
def validate_answer(example, pred, trace=None):
return example.answer == pred.answer
# Optimize
optimizer = BootstrapFewShot(metric=validate_answer, max_bootstrapped_demos=3)
optimized_qa = optimizer.compile(qa, trainset=trainset)
# Now optimized_qa performs better!
Iteratively improves prompts:
from dspy.teleprompt import MIPRO
optimizer = MIPRO(
metric=validate_answer,
num_candidates=10,
init_temperature=1.0
)
optimized_cot = optimizer.compile(
cot,
trainset=trainset,
num_trials=100
)
Creates datasets for model fine-tuning:
from dspy.teleprompt import BootstrapFinetune
optimizer = BootstrapFinetune(metric=validate_answer)
optimized_module = optimizer.compile(qa, trainset=trainset)
# Exports training data for fine-tuning
import dspy
class MultiHopQA(dspy.Module):
def __init__(self):
super().__init__()
self.retrieve = dspy.Retrieve(k=3)
self.generate_query = dspy.ChainOfThought("question -> search_query")
self.generate_answer = dspy.ChainOfThought("context, question -> answer")
def forward(self, question):
# Stage 1: Generate search query
search_query = self.generate_query(question=question).search_query
# Stage 2: Retrieve context
passages = self.retrieve(search_query).passages
context = "\n".join(passages)
# Stage 3: Generate answer
answer = self.generate_answer(context=context, question=question).answer
return dspy.Prediction(answer=answer, context=context)
# Use the pipeline
qa_system = MultiHopQA()
result = qa_system(question="Who wrote the book that inspired the movie Blade Runner?")
import dspy
from dspy.retrieve.chromadb_rm import ChromadbRM
# Configure retriever
retriever = ChromadbRM(
collection_name="documents",
persist_directory="./chroma_db"
)
class RAG(dspy.Module):
def __init__(self, num_passages=3):
super().__init__()
self.retrieve = dspy.Retrieve(k=num_passages)
self.generate = dspy.ChainOfThought("context, question -> answer")
def forward(self, question):
context = self.retrieve(question).passages
return self.generate(context=context, question=question)
# Create and optimize
rag = RAG()
# Optimize with training data
from dspy.teleprompt import BootstrapFewShot
optimizer = BootstrapFewShot(metric=validate_answer)
optimized_rag = optimizer.compile(rag, trainset=trainset)
import dspy
lm = dspy.Claude(
model="claude-sonnet-4-5-20250929",
api_key="your-api-key", # Or set ANTHROPIC_API_KEY env var
max_tokens=1000,
temperature=0.7
)
dspy.settings.configure(lm=lm)
lm = dspy.OpenAI(
model="gpt-4",
api_key="your-api-key",
max_tokens=1000
)
dspy.settings.configure(lm=lm)
lm = dspy.OllamaLocal(
model="llama3.1",
base_url="http://localhost:11434"
)
dspy.settings.configure(lm=lm)
# Different models for different tasks
cheap_lm = dspy.OpenAI(model="gpt-3.5-turbo")
strong_lm = dspy.Claude(model="claude-sonnet-4-5-20250929")
# Use cheap model for retrieval, strong model for reasoning
with dspy.settings.context(lm=cheap_lm):
context = retriever(question)
with dspy.settings.context(lm=strong_lm):
answer = generator(context=context, question=question)
from pydantic import BaseModel, Field
class PersonInfo(BaseModel):
name: str = Field(description="Full name")
age: int = Field(description="Age in years")
occupation: str = Field(description="Current job")
class ExtractPerson(dspy.Signature):
"""Extract person information from text."""
text = dspy.InputField()
person: PersonInfo = dspy.OutputField()
extractor = dspy.TypedPredictor(ExtractPerson)
result = extractor(text="John Doe is a 35-year-old software engineer.")
print(result.person.name) # "John Doe"
print(result.person.age) # 35
import dspy
from dspy.primitives.assertions import assert_transform_module, backtrack_handler
class MathQA(dspy.Module):
def __init__(self):
super().__init__()
self.solve = dspy.ChainOfThought("problem -> solution: float")
def forward(self, problem):
solution = self.solve(problem=problem).solution
# Assert solution is numeric
dspy.Assert(
isinstance(float(solution), float),
"Solution must be a number",
backtrack=backtrack_handler
)
return dspy.Prediction(solution=solution)
import dspy
from collections import Counter
class ConsistentQA(dspy.Module):
def __init__(self, num_samples=5):
super().__init__()
self.qa = dspy.ChainOfThought("question -> answer")
self.num_samples = num_samples
def forward(self, question):
# Generate multiple answers
answers = []
for _ in range(self.num_samples):
result = self.qa(question=question)
answers.append(result.answer)
# Return most common answer
most_common = Counter(answers).most_common(1)[0][0]
return dspy.Prediction(answer=most_common)
class RerankedRAG(dspy.Module):
def __init__(self):
super().__init__()
self.retrieve = dspy.Retrieve(k=10)
self.rerank = dspy.Predict("question, passage -> relevance_score: float")
self.answer = dspy.ChainOfThought("context, question -> answer")
def forward(self, question):
# Retrieve candidates
passages = self.retrieve(question).passages
# Rerank passages
scored = []
for passage in passages:
score = float(self.rerank(question=question, passage=passage).relevance_score)
scored.append((score, passage))
# Take top 3
top_passages = [p for _, p in sorted(scored, reverse=True)[:3]]
context = "\n\n".join(top_passages)
# Generate answer
return self.answer(context=context, question=question)
def exact_match(example, pred, trace=None):
"""Exact match metric."""
return example.answer.lower() == pred.answer.lower()
def f1_score(example, pred, trace=None):
"""F1 score for text overlap."""
pred_tokens = set(pred.answer.lower().split())
gold_tokens = set(example.answer.lower().split())
if not pred_tokens:
return 0.0
precision = len(pred_tokens & gold_tokens) / len(pred_tokens)
recall = len(pred_tokens & gold_tokens) / len(gold_tokens)
if precision + recall == 0:
return 0.0
return 2 * (precision * recall) / (precision + recall)
from dspy.evaluate import Evaluate
# Create evaluator
evaluator = Evaluate(
devset=testset,
metric=exact_match,
num_threads=4,
display_progress=True
)
# Evaluate model
score = evaluator(qa_system)
print(f"Accuracy: {score}")
# Compare optimized vs unoptimized
score_before = evaluator(qa)
score_after = evaluator(optimized_qa)
print(f"Improvement: {score_after - score_before:.2%}")
# Start with Predict
qa = dspy.Predict("question -> answer")
# Add reasoning if needed
qa = dspy.ChainOfThought("question -> answer")
# Add optimization when you have data
optimized_qa = optimizer.compile(qa, trainset=data)
# ❌ Bad: Vague
class Task(dspy.Signature):
input = dspy.InputField()
output = dspy.OutputField()
# ✅ Good: Descriptive
class SummarizeArticle(dspy.Signature):
"""Summarize news articles into 3-5 key points."""
article = dspy.InputField(desc="full article text")
summary = dspy.OutputField(desc="bullet points, 3-5 items")
# Create diverse training examples
trainset = [
dspy.Example(question="factual", answer="...).with_inputs("question"),
dspy.Example(question="reasoning", answer="...").with_inputs("question"),
dspy.Example(question="calculation", answer="...").with_inputs("question"),
]
# Use validation set for metric
def metric(example, pred, trace=None):
return example.answer in pred.answer
# Save
optimized_qa.save("models/qa_v1.json")
# Load
loaded_qa = dspy.ChainOfThought("question -> answer")
loaded_qa.load("models/qa_v1.json")
# Enable tracing
dspy.settings.configure(lm=lm, trace=[])
# Run prediction
result = qa(question="...")
# Inspect trace
for call in dspy.settings.trace:
print(f"Prompt: {call['prompt']}")
print(f"Response: {call['response']}")
| Feature | Manual Prompting | LangChain | DSPy |
|---|---|---|---|
| Prompt Engineering | Manual | Manual | Automatic |
| Optimization | Trial & error | None | Data-driven |
| Modularity | Low | Medium | High |
| Type Safety | No | Limited | Yes (Signatures) |
| Portability | Low | Medium | High |
| Learning Curve | Low | Medium | Medium-High |
When to choose DSPy:
When to choose alternatives:
references/modules.md - Detailed module guide (Predict, ChainOfThought, ReAct, ProgramOfThought)references/optimizers.md - Optimization algorithms (BootstrapFewShot, MIPRO, BootstrapFinetune)references/examples.md - Real-world examples (RAG, agents, classifiers)Weekly Installs
183
Repository
GitHub Stars
22.6K
First Seen
Jan 21, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykWarn
Installed on
opencode150
claude-code143
gemini-cli140
codex128
cursor128
github-copilot122
超能力技能使用指南:AI助手技能调用优先级与工作流程详解
45,100 周安装