weights-and-biases by davila7/claude-code-templates
npx skills add https://github.com/davila7/claude-code-templates --skill weights-and-biases当你需要以下功能时,请使用 Weights & Biases (W&B):
用户:200,000+ 机器学习从业者 | GitHub Stars:10.5k+ | 集成:100+
# 安装 W&B
pip install wandb
# 登录(创建 API 密钥)
wandb login
# 或以编程方式设置 API 密钥
export WANDB_API_KEY=your_api_key_here
import wandb
# 初始化一个运行
run = wandb.init(
project="my-project",
config={
"learning_rate": 0.001,
"epochs": 10,
"batch_size": 32,
"architecture": "ResNet50"
}
)
# 训练循环
for epoch in range(run.config.epochs):
# 你的训练代码
train_loss = train_epoch()
val_loss = validate()
# 记录指标
wandb.log({
"epoch": epoch,
"train/loss": train_loss,
"val/loss": val_loss,
"train/accuracy": train_acc,
"val/accuracy": val_acc
})
# 结束运行
wandb.finish()
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
import torch
import wandb
# 初始化
wandb.init(project="pytorch-demo", config={
"lr": 0.001,
"epochs": 10
})
# 访问配置
config = wandb.config
# 训练循环
for epoch in range(config.epochs):
for batch_idx, (data, target) in enumerate(train_loader):
# 前向传播
output = model(data)
loss = criterion(output, target)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 每 100 个批次记录一次
if batch_idx % 100 == 0:
wandb.log({
"loss": loss.item(),
"epoch": epoch,
"batch": batch_idx
})
# 保存模型
torch.save(model.state_dict(), "model.pth")
wandb.save("model.pth") # 上传到 W&B
wandb.finish()
项目:相关实验的集合 运行:训练脚本的单次执行
# 创建/使用项目
run = wandb.init(
project="image-classification",
name="resnet50-experiment-1", # 可选的运行名称
tags=["baseline", "resnet"], # 使用标签组织
notes="First baseline run" # 添加备注
)
# 每个运行都有唯一的 ID
print(f"Run ID: {run.id}")
print(f"Run URL: {run.url}")
自动追踪超参数:
config = {
# 模型架构
"model": "ResNet50",
"pretrained": True,
# 训练参数
"learning_rate": 0.001,
"batch_size": 32,
"epochs": 50,
"optimizer": "Adam",
# 数据参数
"dataset": "ImageNet",
"augmentation": "standard"
}
wandb.init(project="my-project", config=config)
# 在训练期间访问配置
lr = wandb.config.learning_rate
batch_size = wandb.config.batch_size
# 记录标量
wandb.log({"loss": 0.5, "accuracy": 0.92})
# 记录多个指标
wandb.log({
"train/loss": train_loss,
"train/accuracy": train_acc,
"val/loss": val_loss,
"val/accuracy": val_acc,
"learning_rate": current_lr,
"epoch": epoch
})
# 使用自定义 x 轴记录
wandb.log({"loss": loss}, step=global_step)
# 记录媒体(图像、音频、视频)
wandb.log({"examples": [wandb.Image(img) for img in images]})
# 记录直方图
wandb.log({"gradients": wandb.Histogram(gradients)})
# 记录表格
table = wandb.Table(columns=["id", "prediction", "ground_truth"])
wandb.log({"predictions": table})
import torch
import wandb
# 保存模型检查点
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss,
}
torch.save(checkpoint, 'checkpoint.pth')
# 上传到 W&B
wandb.save('checkpoint.pth')
# 或使用工件(推荐)
artifact = wandb.Artifact('model', type='model')
artifact.add_file('checkpoint.pth')
wandb.log_artifact(artifact)
自动搜索最优超参数。
sweep_config = {
'method': 'bayes', # 或 'grid', 'random'
'metric': {
'name': 'val/accuracy',
'goal': 'maximize'
},
'parameters': {
'learning_rate': {
'distribution': 'log_uniform',
'min': 1e-5,
'max': 1e-1
},
'batch_size': {
'values': [16, 32, 64, 128]
},
'optimizer': {
'values': ['adam', 'sgd', 'rmsprop']
},
'dropout': {
'distribution': 'uniform',
'min': 0.1,
'max': 0.5
}
}
}
# 初始化扫描
sweep_id = wandb.sweep(sweep_config, project="my-project")
def train():
# 初始化运行
run = wandb.init()
# 访问扫描参数
lr = wandb.config.learning_rate
batch_size = wandb.config.batch_size
optimizer_name = wandb.config.optimizer
# 使用扫描配置构建模型
model = build_model(wandb.config)
optimizer = get_optimizer(optimizer_name, lr)
# 训练循环
for epoch in range(NUM_EPOCHS):
train_loss = train_epoch(model, optimizer, batch_size)
val_acc = validate(model)
# 记录指标
wandb.log({
"train/loss": train_loss,
"val/accuracy": val_acc
})
# 运行扫描
wandb.agent(sweep_id, function=train, count=50) # 运行 50 次试验
# 网格搜索 - 穷举法
sweep_config = {
'method': 'grid',
'parameters': {
'lr': {'values': [0.001, 0.01, 0.1]},
'batch_size': {'values': [16, 32, 64]}
}
}
# 随机搜索
sweep_config = {
'method': 'random',
'parameters': {
'lr': {'distribution': 'uniform', 'min': 0.0001, 'max': 0.1},
'dropout': {'distribution': 'uniform', 'min': 0.1, 'max': 0.5}
}
}
# 贝叶斯优化(推荐)
sweep_config = {
'method': 'bayes',
'metric': {'name': 'val/loss', 'goal': 'minimize'},
'parameters': {
'lr': {'distribution': 'log_uniform', 'min': 1e-5, 'max': 1e-1}
}
}
通过谱系追踪数据集、模型和其他文件。
# 创建工件
artifact = wandb.Artifact(
name='training-dataset',
type='dataset',
description='ImageNet training split',
metadata={'size': '1.2M images', 'split': 'train'}
)
# 添加文件
artifact.add_file('data/train.csv')
artifact.add_dir('data/images/')
# 记录工件
wandb.log_artifact(artifact)
# 下载并使用工件
run = wandb.init(project="my-project")
# 下载工件
artifact = run.use_artifact('training-dataset:latest')
artifact_dir = artifact.download()
# 使用数据
data = load_data(f"{artifact_dir}/train.csv")
# 将模型记录为工件
model_artifact = wandb.Artifact(
name='resnet50-model',
type='model',
metadata={'architecture': 'ResNet50', 'accuracy': 0.95}
)
model_artifact.add_file('model.pth')
wandb.log_artifact(model_artifact, aliases=['best', 'production'])
# 链接到模型注册表
run.link_artifact(model_artifact, 'model-registry/production-models')
from transformers import Trainer, TrainingArguments
import wandb
# 初始化 W&B
wandb.init(project="hf-transformers")
# 包含 W&B 的训练参数
training_args = TrainingArguments(
output_dir="./results",
report_to="wandb", # 启用 W&B 记录
run_name="bert-finetuning",
logging_steps=100,
save_steps=500
)
# Trainer 自动记录到 W&B
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset
)
trainer.train()
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import WandbLogger
import wandb
# 创建 W&B 记录器
wandb_logger = WandbLogger(
project="lightning-demo",
log_model=True # 记录模型检查点
)
# 与 Trainer 一起使用
trainer = Trainer(
logger=wandb_logger,
max_epochs=10
)
trainer.fit(model, datamodule=dm)
import wandb
from wandb.keras import WandbCallback
# 初始化
wandb.init(project="keras-demo")
# 添加回调
model.fit(
x_train, y_train,
validation_data=(x_val, y_val),
epochs=10,
callbacks=[WandbCallback()] # 自动记录指标
)
# 记录自定义可视化
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(x, y)
wandb.log({"custom_plot": wandb.Image(fig)})
# 记录混淆矩阵
wandb.log({"conf_mat": wandb.plot.confusion_matrix(
probs=None,
y_true=ground_truth,
preds=predictions,
class_names=class_names
)})
在 W&B UI 中创建可共享的报告:
wandb.init(
project="my-project",
tags=["baseline", "resnet50", "imagenet"],
group="resnet-experiments", # 分组相关运行
job_type="train" # 作业类型
)
# 记录系统指标
wandb.log({
"gpu/util": gpu_utilization,
"gpu/memory": gpu_memory_used,
"cpu/util": cpu_utilization
})
# 记录代码版本
wandb.log({"git_commit": git_commit_hash})
# 记录数据划分
wandb.log({
"data/train_size": len(train_dataset),
"data/val_size": len(val_dataset)
})
# ✅ 良好:描述性的运行名称
wandb.init(
project="nlp-classification",
name="bert-base-lr0.001-bs32-epoch10"
)
# ❌ 不佳:通用名称
wandb.init(project="nlp", name="run1")
# 保存最终模型
artifact = wandb.Artifact('final-model', type='model')
artifact.add_file('model.pth')
wandb.log_artifact(artifact)
# 保存预测结果以供分析
predictions_table = wandb.Table(
columns=["id", "input", "prediction", "ground_truth"],
data=predictions_data
)
wandb.log({"predictions": predictions_table})
import os
# 启用离线模式
os.environ["WANDB_MODE"] = "offline"
wandb.init(project="my-project")
# ... 你的代码 ...
# 稍后同步
# wandb sync <run_directory>
# 运行可以通过 URL 自动共享
run = wandb.init(project="team-project")
print(f"分享此 URL: {run.url}")
references/sweeps.md - 全面的超参数优化指南references/artifacts.md - 数据和模型版本控制模式references/integrations.md - 框架特定示例每周安装次数
185
仓库
GitHub Stars
23.4K
首次出现
2026年1月21日
安全审计
安装于
opencode154
claude-code153
gemini-cli148
cursor137
codex134
github-copilot125
Use Weights & Biases (W&B) when you need to:
Users : 200,000+ ML practitioners | GitHub Stars : 10.5k+ | Integrations : 100+
# Install W&B
pip install wandb
# Login (creates API key)
wandb login
# Or set API key programmatically
export WANDB_API_KEY=your_api_key_here
import wandb
# Initialize a run
run = wandb.init(
project="my-project",
config={
"learning_rate": 0.001,
"epochs": 10,
"batch_size": 32,
"architecture": "ResNet50"
}
)
# Training loop
for epoch in range(run.config.epochs):
# Your training code
train_loss = train_epoch()
val_loss = validate()
# Log metrics
wandb.log({
"epoch": epoch,
"train/loss": train_loss,
"val/loss": val_loss,
"train/accuracy": train_acc,
"val/accuracy": val_acc
})
# Finish the run
wandb.finish()
import torch
import wandb
# Initialize
wandb.init(project="pytorch-demo", config={
"lr": 0.001,
"epochs": 10
})
# Access config
config = wandb.config
# Training loop
for epoch in range(config.epochs):
for batch_idx, (data, target) in enumerate(train_loader):
# Forward pass
output = model(data)
loss = criterion(output, target)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Log every 100 batches
if batch_idx % 100 == 0:
wandb.log({
"loss": loss.item(),
"epoch": epoch,
"batch": batch_idx
})
# Save model
torch.save(model.state_dict(), "model.pth")
wandb.save("model.pth") # Upload to W&B
wandb.finish()
Project : Collection of related experiments Run : Single execution of your training script
# Create/use project
run = wandb.init(
project="image-classification",
name="resnet50-experiment-1", # Optional run name
tags=["baseline", "resnet"], # Organize with tags
notes="First baseline run" # Add notes
)
# Each run has unique ID
print(f"Run ID: {run.id}")
print(f"Run URL: {run.url}")
Track hyperparameters automatically:
config = {
# Model architecture
"model": "ResNet50",
"pretrained": True,
# Training params
"learning_rate": 0.001,
"batch_size": 32,
"epochs": 50,
"optimizer": "Adam",
# Data params
"dataset": "ImageNet",
"augmentation": "standard"
}
wandb.init(project="my-project", config=config)
# Access config during training
lr = wandb.config.learning_rate
batch_size = wandb.config.batch_size
# Log scalars
wandb.log({"loss": 0.5, "accuracy": 0.92})
# Log multiple metrics
wandb.log({
"train/loss": train_loss,
"train/accuracy": train_acc,
"val/loss": val_loss,
"val/accuracy": val_acc,
"learning_rate": current_lr,
"epoch": epoch
})
# Log with custom x-axis
wandb.log({"loss": loss}, step=global_step)
# Log media (images, audio, video)
wandb.log({"examples": [wandb.Image(img) for img in images]})
# Log histograms
wandb.log({"gradients": wandb.Histogram(gradients)})
# Log tables
table = wandb.Table(columns=["id", "prediction", "ground_truth"])
wandb.log({"predictions": table})
import torch
import wandb
# Save model checkpoint
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss,
}
torch.save(checkpoint, 'checkpoint.pth')
# Upload to W&B
wandb.save('checkpoint.pth')
# Or use Artifacts (recommended)
artifact = wandb.Artifact('model', type='model')
artifact.add_file('checkpoint.pth')
wandb.log_artifact(artifact)
Automatically search for optimal hyperparameters.
sweep_config = {
'method': 'bayes', # or 'grid', 'random'
'metric': {
'name': 'val/accuracy',
'goal': 'maximize'
},
'parameters': {
'learning_rate': {
'distribution': 'log_uniform',
'min': 1e-5,
'max': 1e-1
},
'batch_size': {
'values': [16, 32, 64, 128]
},
'optimizer': {
'values': ['adam', 'sgd', 'rmsprop']
},
'dropout': {
'distribution': 'uniform',
'min': 0.1,
'max': 0.5
}
}
}
# Initialize sweep
sweep_id = wandb.sweep(sweep_config, project="my-project")
def train():
# Initialize run
run = wandb.init()
# Access sweep parameters
lr = wandb.config.learning_rate
batch_size = wandb.config.batch_size
optimizer_name = wandb.config.optimizer
# Build model with sweep config
model = build_model(wandb.config)
optimizer = get_optimizer(optimizer_name, lr)
# Training loop
for epoch in range(NUM_EPOCHS):
train_loss = train_epoch(model, optimizer, batch_size)
val_acc = validate(model)
# Log metrics
wandb.log({
"train/loss": train_loss,
"val/accuracy": val_acc
})
# Run sweep
wandb.agent(sweep_id, function=train, count=50) # Run 50 trials
# Grid search - exhaustive
sweep_config = {
'method': 'grid',
'parameters': {
'lr': {'values': [0.001, 0.01, 0.1]},
'batch_size': {'values': [16, 32, 64]}
}
}
# Random search
sweep_config = {
'method': 'random',
'parameters': {
'lr': {'distribution': 'uniform', 'min': 0.0001, 'max': 0.1},
'dropout': {'distribution': 'uniform', 'min': 0.1, 'max': 0.5}
}
}
# Bayesian optimization (recommended)
sweep_config = {
'method': 'bayes',
'metric': {'name': 'val/loss', 'goal': 'minimize'},
'parameters': {
'lr': {'distribution': 'log_uniform', 'min': 1e-5, 'max': 1e-1}
}
}
Track datasets, models, and other files with lineage.
# Create artifact
artifact = wandb.Artifact(
name='training-dataset',
type='dataset',
description='ImageNet training split',
metadata={'size': '1.2M images', 'split': 'train'}
)
# Add files
artifact.add_file('data/train.csv')
artifact.add_dir('data/images/')
# Log artifact
wandb.log_artifact(artifact)
# Download and use artifact
run = wandb.init(project="my-project")
# Download artifact
artifact = run.use_artifact('training-dataset:latest')
artifact_dir = artifact.download()
# Use the data
data = load_data(f"{artifact_dir}/train.csv")
# Log model as artifact
model_artifact = wandb.Artifact(
name='resnet50-model',
type='model',
metadata={'architecture': 'ResNet50', 'accuracy': 0.95}
)
model_artifact.add_file('model.pth')
wandb.log_artifact(model_artifact, aliases=['best', 'production'])
# Link to model registry
run.link_artifact(model_artifact, 'model-registry/production-models')
from transformers import Trainer, TrainingArguments
import wandb
# Initialize W&B
wandb.init(project="hf-transformers")
# Training arguments with W&B
training_args = TrainingArguments(
output_dir="./results",
report_to="wandb", # Enable W&B logging
run_name="bert-finetuning",
logging_steps=100,
save_steps=500
)
# Trainer automatically logs to W&B
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset
)
trainer.train()
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import WandbLogger
import wandb
# Create W&B logger
wandb_logger = WandbLogger(
project="lightning-demo",
log_model=True # Log model checkpoints
)
# Use with Trainer
trainer = Trainer(
logger=wandb_logger,
max_epochs=10
)
trainer.fit(model, datamodule=dm)
import wandb
from wandb.keras import WandbCallback
# Initialize
wandb.init(project="keras-demo")
# Add callback
model.fit(
x_train, y_train,
validation_data=(x_val, y_val),
epochs=10,
callbacks=[WandbCallback()] # Auto-logs metrics
)
# Log custom visualizations
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(x, y)
wandb.log({"custom_plot": wandb.Image(fig)})
# Log confusion matrix
wandb.log({"conf_mat": wandb.plot.confusion_matrix(
probs=None,
y_true=ground_truth,
preds=predictions,
class_names=class_names
)})
Create shareable reports in W&B UI:
wandb.init(
project="my-project",
tags=["baseline", "resnet50", "imagenet"],
group="resnet-experiments", # Group related runs
job_type="train" # Type of job
)
# Log system metrics
wandb.log({
"gpu/util": gpu_utilization,
"gpu/memory": gpu_memory_used,
"cpu/util": cpu_utilization
})
# Log code version
wandb.log({"git_commit": git_commit_hash})
# Log data splits
wandb.log({
"data/train_size": len(train_dataset),
"data/val_size": len(val_dataset)
})
# ✅ Good: Descriptive run names
wandb.init(
project="nlp-classification",
name="bert-base-lr0.001-bs32-epoch10"
)
# ❌ Bad: Generic names
wandb.init(project="nlp", name="run1")
# Save final model
artifact = wandb.Artifact('final-model', type='model')
artifact.add_file('model.pth')
wandb.log_artifact(artifact)
# Save predictions for analysis
predictions_table = wandb.Table(
columns=["id", "input", "prediction", "ground_truth"],
data=predictions_data
)
wandb.log({"predictions": predictions_table})
import os
# Enable offline mode
os.environ["WANDB_MODE"] = "offline"
wandb.init(project="my-project")
# ... your code ...
# Sync later
# wandb sync <run_directory>
# Runs are automatically shareable via URL
run = wandb.init(project="team-project")
print(f"Share this URL: {run.url}")
references/sweeps.md - Comprehensive hyperparameter optimization guidereferences/artifacts.md - Data and model versioning patternsreferences/integrations.md - Framework-specific examplesWeekly Installs
185
Repository
GitHub Stars
23.4K
First Seen
Jan 21, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykWarn
Installed on
opencode154
claude-code153
gemini-cli148
cursor137
codex134
github-copilot125
高级提示工程模式:最大化LLM性能的5大核心技术与模板系统
402 周安装
隐私政策生成器 - 专业数据隐私合规专家,起草全面合规的隐私政策
410 周安装
App Store Connect 发布流程自动化工具:asc-release-flow 使用指南
412 周安装
asc-id-resolver:App Store Connect ID 解析工具,快速获取应用、构建、版本等ID
405 周安装
钉钉机器人消息桥接工具 - 通过WebSocket连接钉钉与Clawdbot AI智能体
404 周安装
Zustand适配器:为json-render提供状态管理后端,支持嵌套切片与Zustand v5+
410 周安装