langsmith-observability by davila7/claude-code-templates
npx skills add https://github.com/davila7/claude-code-templates --skill langsmith-observability用于调试、评估和监控语言模型与 AI 应用程序的开发平台。
在以下情况下使用 LangSmith:
主要特性:
使用替代方案的情况:
pip install langsmith
# 设置环境变量
export LANGSMITH_API_KEY="your-api-key"
export LANGSMITH_TRACING=true
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
from langsmith import traceable
from openai import OpenAI
client = OpenAI()
@traceable
def generate_response(prompt: str) -> str:
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
# 自动追踪到 LangSmith
result = generate_response("What is machine learning?")
from langsmith.wrappers import wrap_openai
from openai import OpenAI
# 包装客户端以实现自动追踪
client = wrap_openai(OpenAI())
# 所有调用自动被追踪
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}]
)
一个 运行 是一个独立的执行单元(LLM 调用、链、工具)。运行构成层次化的 追踪,展示完整的执行流程。
from langsmith import traceable
@traceable(run_type="chain")
def process_query(query: str) -> str:
# 父运行
context = retrieve_context(query) # 子运行
response = generate_answer(query, context) # 子运行
return response
@traceable(run_type="retriever")
def retrieve_context(query: str) -> list:
return vector_store.search(query)
@traceable(run_type="llm")
def generate_answer(query: str, context: list) -> str:
return llm.invoke(f"Context: {context}\n\nQuestion: {query}")
项目用于组织相关的运行。通过环境变量或代码设置:
import os
os.environ["LANGSMITH_PROJECT"] = "my-project"
# 或者按函数设置
@traceable(project_name="my-project")
def my_function():
pass
from langsmith import Client
client = Client()
# 列出运行
runs = list(client.list_runs(
project_name="my-project",
filter='eq(status, "success")',
limit=100
))
# 获取运行详情
run = client.read_run(run_id="...")
# 创建反馈
client.create_feedback(
run_id="...",
key="correctness",
score=0.9,
comment="Good answer"
)
from langsmith import Client
client = Client()
# 创建数据集
dataset = client.create_dataset("qa-test-set", description="QA evaluation")
# 添加示例
client.create_examples(
inputs=[
{"question": "What is Python?"},
{"question": "What is ML?"}
],
outputs=[
{"answer": "A programming language"},
{"answer": "Machine learning"}
],
dataset_id=dataset.id
)
from langsmith import evaluate
def my_model(inputs: dict) -> dict:
# 你的模型逻辑
return {"answer": generate_answer(inputs["question"])}
def correctness_evaluator(run, example):
prediction = run.outputs["answer"]
reference = example.outputs["answer"]
score = 1.0 if reference.lower() in prediction.lower() else 0.0
return {"key": "correctness", "score": score}
results = evaluate(
my_model,
data="qa-test-set",
evaluators=[correctness_evaluator],
experiment_prefix="v1"
)
print(f"Average score: {results.aggregate_metrics['correctness']}")
from langsmith.evaluation import LangChainStringEvaluator
# 使用 LangChain 评估器
results = evaluate(
my_model,
data="qa-test-set",
evaluators=[
LangChainStringEvaluator("qa"),
LangChainStringEvaluator("cot_qa")
]
)
from langsmith import tracing_context
with tracing_context(
project_name="experiment-1",
tags=["production", "v2"],
metadata={"version": "2.0"}
):
# 所有可追踪调用继承此上下文
result = my_function()
from langsmith import trace
with trace(
name="custom_operation",
run_type="tool",
inputs={"query": "test"}
) as run:
result = do_something()
run.end(outputs={"result": result})
def sanitize_inputs(inputs: dict) -> dict:
if "password" in inputs:
inputs["password"] = "***"
return inputs
@traceable(process_inputs=sanitize_inputs)
def login(username: str, password: str):
return authenticate(username, password)
import os
os.environ["LANGSMITH_TRACING_SAMPLING_RATE"] = "0.1" # 10% 采样率
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
# 设置 LANGSMITH_TRACING=true 后自动启用追踪
llm = ChatOpenAI(model="gpt-4o")
prompt = ChatPromptTemplate.from_messages([
("system", "You are a helpful assistant."),
("user", "{input}")
])
chain = prompt | llm
# 所有链的运行自动被追踪
response = chain.invoke({"input": "Hello!"})
from langsmith import Client
client = Client()
# 从 hub 拉取提示词
prompt = client.pull_prompt("my-org/qa-prompt")
# 在应用程序中使用
result = prompt.invoke({"question": "What is AI?"})
from langsmith import AsyncClient
async def main():
client = AsyncClient()
runs = []
async for run in client.list_runs(project_name="my-project"):
runs.append(run)
return runs
from langsmith import Client
client = Client()
# 收集用户反馈
def record_feedback(run_id: str, user_rating: int, comment: str = None):
client.create_feedback(
run_id=run_id,
key="user_rating",
score=user_rating / 5.0, # 归一化到 0-1
comment=comment
)
# 在你的应用程序中
record_feedback(run_id="...", user_rating=4, comment="Helpful response")
from langsmith import test
@test
def test_qa_accuracy():
result = my_qa_function("What is Python?")
assert "programming" in result.lower()
from langsmith import evaluate
def run_evaluation():
results = evaluate(
my_model,
data="regression-test-set",
evaluators=[accuracy_evaluator]
)
# 如果准确率下降,则 CI 失败
assert results.aggregate_metrics["accuracy"] >= 0.9, \
f"Accuracy {results.aggregate_metrics['accuracy']} below threshold"
追踪记录未出现:
import os
# 确保追踪已启用
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = "your-key"
# 验证连接
from langsmith import Client
client = Client()
print(client.list_projects()) # 应该能正常工作
追踪导致的高延迟:
# 启用后台批处理(默认)
from langsmith import Client
client = Client(auto_batch_tracing=True)
# 或者使用采样
os.environ["LANGSMITH_TRACING_SAMPLING_RATE"] = "0.1"
大负载:
# 隐藏敏感/大字段
@traceable(
process_inputs=lambda x: {k: v for k, v in x.items() if k != "large_field"}
)
def my_function(data):
pass
每周安装量
177
代码仓库
GitHub 星标数
22.6K
首次出现
2026年1月21日
安全审计
安装于
opencode142
claude-code140
gemini-cli135
cursor123
codex122
github-copilot113
Development platform for debugging, evaluating, and monitoring language models and AI applications.
Use LangSmith when:
Key features:
Use alternatives instead:
pip install langsmith
# Set environment variables
export LANGSMITH_API_KEY="your-api-key"
export LANGSMITH_TRACING=true
from langsmith import traceable
from openai import OpenAI
client = OpenAI()
@traceable
def generate_response(prompt: str) -> str:
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
# Automatically traced to LangSmith
result = generate_response("What is machine learning?")
from langsmith.wrappers import wrap_openai
from openai import OpenAI
# Wrap client for automatic tracing
client = wrap_openai(OpenAI())
# All calls automatically traced
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}]
)
A run is a single execution unit (LLM call, chain, tool). Runs form hierarchical traces showing the full execution flow.
from langsmith import traceable
@traceable(run_type="chain")
def process_query(query: str) -> str:
# Parent run
context = retrieve_context(query) # Child run
response = generate_answer(query, context) # Child run
return response
@traceable(run_type="retriever")
def retrieve_context(query: str) -> list:
return vector_store.search(query)
@traceable(run_type="llm")
def generate_answer(query: str, context: list) -> str:
return llm.invoke(f"Context: {context}\n\nQuestion: {query}")
Projects organize related runs. Set via environment or code:
import os
os.environ["LANGSMITH_PROJECT"] = "my-project"
# Or per-function
@traceable(project_name="my-project")
def my_function():
pass
from langsmith import Client
client = Client()
# List runs
runs = list(client.list_runs(
project_name="my-project",
filter='eq(status, "success")',
limit=100
))
# Get run details
run = client.read_run(run_id="...")
# Create feedback
client.create_feedback(
run_id="...",
key="correctness",
score=0.9,
comment="Good answer"
)
from langsmith import Client
client = Client()
# Create dataset
dataset = client.create_dataset("qa-test-set", description="QA evaluation")
# Add examples
client.create_examples(
inputs=[
{"question": "What is Python?"},
{"question": "What is ML?"}
],
outputs=[
{"answer": "A programming language"},
{"answer": "Machine learning"}
],
dataset_id=dataset.id
)
from langsmith import evaluate
def my_model(inputs: dict) -> dict:
# Your model logic
return {"answer": generate_answer(inputs["question"])}
def correctness_evaluator(run, example):
prediction = run.outputs["answer"]
reference = example.outputs["answer"]
score = 1.0 if reference.lower() in prediction.lower() else 0.0
return {"key": "correctness", "score": score}
results = evaluate(
my_model,
data="qa-test-set",
evaluators=[correctness_evaluator],
experiment_prefix="v1"
)
print(f"Average score: {results.aggregate_metrics['correctness']}")
from langsmith.evaluation import LangChainStringEvaluator
# Use LangChain evaluators
results = evaluate(
my_model,
data="qa-test-set",
evaluators=[
LangChainStringEvaluator("qa"),
LangChainStringEvaluator("cot_qa")
]
)
from langsmith import tracing_context
with tracing_context(
project_name="experiment-1",
tags=["production", "v2"],
metadata={"version": "2.0"}
):
# All traceable calls inherit context
result = my_function()
from langsmith import trace
with trace(
name="custom_operation",
run_type="tool",
inputs={"query": "test"}
) as run:
result = do_something()
run.end(outputs={"result": result})
def sanitize_inputs(inputs: dict) -> dict:
if "password" in inputs:
inputs["password"] = "***"
return inputs
@traceable(process_inputs=sanitize_inputs)
def login(username: str, password: str):
return authenticate(username, password)
import os
os.environ["LANGSMITH_TRACING_SAMPLING_RATE"] = "0.1" # 10% sampling
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
# Tracing enabled automatically with LANGSMITH_TRACING=true
llm = ChatOpenAI(model="gpt-4o")
prompt = ChatPromptTemplate.from_messages([
("system", "You are a helpful assistant."),
("user", "{input}")
])
chain = prompt | llm
# All chain runs traced automatically
response = chain.invoke({"input": "Hello!"})
from langsmith import Client
client = Client()
# Pull prompt from hub
prompt = client.pull_prompt("my-org/qa-prompt")
# Use in application
result = prompt.invoke({"question": "What is AI?"})
from langsmith import AsyncClient
async def main():
client = AsyncClient()
runs = []
async for run in client.list_runs(project_name="my-project"):
runs.append(run)
return runs
from langsmith import Client
client = Client()
# Collect user feedback
def record_feedback(run_id: str, user_rating: int, comment: str = None):
client.create_feedback(
run_id=run_id,
key="user_rating",
score=user_rating / 5.0, # Normalize to 0-1
comment=comment
)
# In your application
record_feedback(run_id="...", user_rating=4, comment="Helpful response")
from langsmith import test
@test
def test_qa_accuracy():
result = my_qa_function("What is Python?")
assert "programming" in result.lower()
from langsmith import evaluate
def run_evaluation():
results = evaluate(
my_model,
data="regression-test-set",
evaluators=[accuracy_evaluator]
)
# Fail CI if accuracy drops
assert results.aggregate_metrics["accuracy"] >= 0.9, \
f"Accuracy {results.aggregate_metrics['accuracy']} below threshold"
Traces not appearing:
import os
# Ensure tracing is enabled
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = "your-key"
# Verify connection
from langsmith import Client
client = Client()
print(client.list_projects()) # Should work
High latency from tracing:
# Enable background batching (default)
from langsmith import Client
client = Client(auto_batch_tracing=True)
# Or use sampling
os.environ["LANGSMITH_TRACING_SAMPLING_RATE"] = "0.1"
Large payloads:
# Hide sensitive/large fields
@traceable(
process_inputs=lambda x: {k: v for k, v in x.items() if k != "large_field"}
)
def my_function(data):
pass
Weekly Installs
177
Repository
GitHub Stars
22.6K
First Seen
Jan 21, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykWarn
Installed on
opencode142
claude-code140
gemini-cli135
cursor123
codex122
github-copilot113
超能力技能使用指南:AI助手技能调用优先级与工作流程详解
46,500 周安装