audiocraft-audio-generation by davila7/claude-code-templates
npx skills add https://github.com/davila7/claude-code-templates --skill audiocraft-audio-generation使用 Meta 的 AudioCraft 进行文本到音乐和文本到音频生成的综合指南,涵盖 MusicGen、AudioGen 和 EnCodec。
在以下情况下使用 AudioCraft:
主要特性:
使用替代方案的情况:
# 从 PyPI 安装
pip install audiocraft
# 从 GitHub 安装(最新版)
pip install git+https://github.com/facebookresearch/audiocraft.git
# 或者使用 HuggingFace Transformers
pip install transformers torch torchaudio
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
import torchaudio
from audiocraft.models import MusicGen
# 加载模型
model = MusicGen.get_pretrained('facebook/musicgen-small')
# 设置生成参数
model.set_generation_params(
duration=8, # 秒
top_k=250,
temperature=1.0
)
# 根据文本生成
descriptions = ["欢快活泼的电子舞曲,带有合成器音效"]
wav = model.generate(descriptions)
# 保存音频
torchaudio.save("output.wav", wav[0].cpu(), sample_rate=32000)
from transformers import AutoProcessor, MusicgenForConditionalGeneration
import scipy
# 加载模型和处理器
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
model.to("cuda")
# 生成音乐
inputs = processor(
text=["80 年代流行音乐,带有低音鼓和合成器"],
padding=True,
return_tensors="pt"
).to("cuda")
audio_values = model.generate(
**inputs,
do_sample=True,
guidance_scale=3,
max_new_tokens=256
)
# 保存
sampling_rate = model.config.audio_encoder.sampling_rate
scipy.io.wavfile.write("output.wav", rate=sampling_rate, data=audio_values[0, 0].cpu().numpy())
from audiocraft.models import AudioGen
# 加载 AudioGen
model = AudioGen.get_pretrained('facebook/audiogen-medium')
model.set_generation_params(duration=5)
# 生成音效
descriptions = ["公园里的狗叫声伴随着鸟鸣"]
wav = model.generate(descriptions)
torchaudio.save("sound.wav", wav[0].cpu(), sample_rate=16000)
AudioCraft Architecture:
┌──────────────────────────────────────────────────────────────┐
│ Text Encoder (T5) │
│ │ │
│ Text Embeddings │
└────────────────────────┬─────────────────────────────────────┘
│
┌────────────────────────▼─────────────────────────────────────┐
│ Transformer Decoder (LM) │
│ Auto-regressively generates audio tokens │
│ Using efficient token interleaving patterns │
└────────────────────────┬─────────────────────────────────────┘
│
┌────────────────────────▼─────────────────────────────────────┐
│ EnCodec Audio Decoder │
│ Converts tokens back to audio waveform │
└──────────────────────────────────────────────────────────────┘
| 模型 | 大小 | 描述 | 使用场景 |
|---|---|---|---|
musicgen-small | 300M | 文本到音乐 | 快速生成 |
musicgen-medium | 1.5B | 文本到音乐 | 平衡性能 |
musicgen-large | 3.3B | 文本到音乐 | 最佳质量 |
musicgen-melody | 1.5B | 文本 + 旋律 | 旋律条件 |
musicgen-melody-large | 3.3B | 文本 + 旋律 | 最佳旋律 |
musicgen-stereo-* | 可变 | 立体声输出 | 立体声生成 |
musicgen-style | 1.5B | 风格迁移 | 基于参考 |
audiogen-medium | 1.5B | 文本到声音 | 音效 |
| 参数 | 默认值 | 描述 |
|---|---|---|
duration | 8.0 | 时长(秒)(1-120) |
top_k | 250 | Top-k 采样 |
top_p | 0.0 | 核心采样(0 = 禁用) |
temperature | 1.0 | 采样温度 |
cfg_coef | 3.0 | 分类器自由引导 |
from audiocraft.models import MusicGen
import torchaudio
model = MusicGen.get_pretrained('facebook/musicgen-medium')
# 配置生成参数
model.set_generation_params(
duration=30, # 最长 30 秒
top_k=250, # 采样多样性
top_p=0.0, # 0 = 仅使用 top_k
temperature=1.0, # 创造性(值越高变化越大)
cfg_coef=3.0 # 文本遵循度(值越高越严格)
)
# 生成多个样本
descriptions = [
"史诗般的管弦乐配乐,带有弦乐和铜管乐",
"轻松的 Lo-Fi 嘻哈节奏,带有爵士钢琴",
"充满活力的摇滚歌曲,带有电吉他"
]
# 生成(返回 [batch, channels, samples])
wav = model.generate(descriptions)
# 分别保存
for i, audio in enumerate(wav):
torchaudio.save(f"music_{i}.wav", audio.cpu(), sample_rate=32000)
from audiocraft.models import MusicGen
import torchaudio
# 加载旋律模型
model = MusicGen.get_pretrained('facebook/musicgen-melody')
model.set_generation_params(duration=30)
# 加载旋律音频
melody, sr = torchaudio.load("melody.wav")
# 使用旋律条件生成
descriptions = ["原声吉他民谣歌曲"]
wav = model.generate_with_chroma(descriptions, melody, sr)
torchaudio.save("melody_conditioned.wav", wav[0].cpu(), sample_rate=32000)
from audiocraft.models import MusicGen
# 加载立体声模型
model = MusicGen.get_pretrained('facebook/musicgen-stereo-medium')
model.set_generation_params(duration=15)
descriptions = ["环境电子音乐,带有宽广的立体声平移效果"]
wav = model.generate(descriptions)
# wav 形状:[batch, 2, samples] 表示立体声
print(f"Stereo shape: {wav.shape}") # [1, 2, 480000]
torchaudio.save("stereo.wav", wav[0].cpu(), sample_rate=32000)
from transformers import AutoProcessor, MusicgenForConditionalGeneration
processor = AutoProcessor.from_pretrained("facebook/musicgen-medium")
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-medium")
# 加载要延续的音频
import torchaudio
audio, sr = torchaudio.load("intro.wav")
# 使用文本和音频进行处理
inputs = processor(
audio=audio.squeeze().numpy(),
sampling_rate=sr,
text=["延续一个史诗般的合唱部分"],
padding=True,
return_tensors="pt"
)
# 生成延续部分
audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=512)
from audiocraft.models import MusicGen
# 加载风格模型
model = MusicGen.get_pretrained('facebook/musicgen-style')
# 配置带风格的生成参数
model.set_generation_params(
duration=30,
cfg_coef=3.0,
cfg_coef_beta=5.0 # 风格影响力
)
# 配置风格调节器参数
model.set_style_conditioner_params(
eval_q=3, # RVQ 量化器(1-6)
excerpt_length=3.0 # 风格片段长度
)
# 加载风格参考
style_audio, sr = torchaudio.load("reference_style.wav")
# 使用文本 + 风格生成
descriptions = ["欢快的舞曲"]
wav = model.generate_with_style(descriptions, style_audio, sr)
# 在没有文本提示的情况下生成匹配风格的音乐
model.set_generation_params(
duration=30,
cfg_coef=3.0,
cfg_coef_beta=None # 为纯风格生成禁用双重 CFG
)
wav = model.generate_with_style([None], style_audio, sr)
from audiocraft.models import AudioGen
import torchaudio
model = AudioGen.get_pretrained('facebook/audiogen-medium')
model.set_generation_params(duration=10)
# 生成各种声音
descriptions = [
"伴有大雨和闪电的雷暴",
"繁忙的城市交通,伴有汽车喇叭声",
"海浪拍打岩石",
"森林中噼啪作响的篝火"
]
wav = model.generate(descriptions)
for i, audio in enumerate(wav):
torchaudio.save(f"sound_{i}.wav", audio.cpu(), sample_rate=16000)
from audiocraft.models import CompressionModel
import torch
import torchaudio
# 加载 EnCodec
model = CompressionModel.get_pretrained('facebook/encodec_32khz')
# 加载音频
wav, sr = torchaudio.load("audio.wav")
# 确保正确的采样率
if sr != 32000:
resampler = torchaudio.transforms.Resample(sr, 32000)
wav = resampler(wav)
# 编码为令牌
with torch.no_grad():
encoded = model.encode(wav.unsqueeze(0))
codes = encoded[0] # 音频代码
# 解码回音频
with torch.no_grad():
decoded = model.decode(codes)
torchaudio.save("reconstructed.wav", decoded[0].cpu(), sample_rate=32000)
import torch
import torchaudio
from audiocraft.models import MusicGen
class MusicGenerator:
def __init__(self, model_name="facebook/musicgen-medium"):
self.model = MusicGen.get_pretrained(model_name)
self.sample_rate = 32000
def generate(self, prompt, duration=30, temperature=1.0, cfg=3.0):
self.model.set_generation_params(
duration=duration,
top_k=250,
temperature=temperature,
cfg_coef=cfg
)
with torch.no_grad():
wav = self.model.generate([prompt])
return wav[0].cpu()
def generate_batch(self, prompts, duration=30):
self.model.set_generation_params(duration=duration)
with torch.no_grad():
wav = self.model.generate(prompts)
return wav.cpu()
def save(self, audio, path):
torchaudio.save(path, audio, sample_rate=self.sample_rate)
# 用法
generator = MusicGenerator()
audio = generator.generate(
"史诗般的电影管弦乐音乐",
duration=30,
temperature=1.0
)
generator.save(audio, "epic_music.wav")
import json
from pathlib import Path
from audiocraft.models import AudioGen
import torchaudio
def batch_generate_sounds(sound_specs, output_dir):
"""
根据规格生成多个声音。
参数:
sound_specs: 字典列表,格式为 {"name": str, "description": str, "duration": float}
output_dir: 输出目录路径
"""
model = AudioGen.get_pretrained('facebook/audiogen-medium')
output_dir = Path(output_dir)
output_dir.mkdir(exist_ok=True)
results = []
for spec in sound_specs:
model.set_generation_params(duration=spec.get("duration", 5))
wav = model.generate([spec["description"]])
output_path = output_dir / f"{spec['name']}.wav"
torchaudio.save(str(output_path), wav[0].cpu(), sample_rate=16000)
results.append({
"name": spec["name"],
"path": str(output_path),
"description": spec["description"]
})
return results
# 用法
sounds = [
{"name": "explosion", "description": "巨大的爆炸声,伴有碎片", "duration": 3},
{"name": "footsteps", "description": "木地板上的脚步声", "duration": 5},
{"name": "door", "description": "木门吱呀作响并关闭", "duration": 2}
]
results = batch_generate_sounds(sounds, "sound_effects/")
import gradio as gr
import torch
import torchaudio
from audiocraft.models import MusicGen
model = MusicGen.get_pretrained('facebook/musicgen-small')
def generate_music(prompt, duration, temperature, cfg_coef):
model.set_generation_params(
duration=duration,
temperature=temperature,
cfg_coef=cfg_coef
)
with torch.no_grad():
wav = model.generate([prompt])
# 保存到临时文件
path = "temp_output.wav"
torchaudio.save(path, wav[0].cpu(), sample_rate=32000)
return path
demo = gr.Interface(
fn=generate_music,
inputs=[
gr.Textbox(label="音乐描述", placeholder="欢快的电子舞曲"),
gr.Slider(1, 30, value=8, label="时长(秒)"),
gr.Slider(0.5, 2.0, value=1.0, label="温度"),
gr.Slider(1.0, 10.0, value=3.0, label="CFG 系数")
],
outputs=gr.Audio(label="生成的音乐"),
title="MusicGen 演示"
)
demo.launch()
# 使用较小的模型
model = MusicGen.get_pretrained('facebook/musicgen-small')
# 在生成之间清除缓存
torch.cuda.empty_cache()
# 生成较短的时长
model.set_generation_params(duration=10) # 而不是 30
# 使用半精度
model = model.half()
# 一次性处理多个提示(更高效)
descriptions = ["prompt1", "prompt2", "prompt3", "prompt4"]
wav = model.generate(descriptions) # 单批次
# 而不是
for desc in descriptions:
wav = model.generate([desc]) # 多个批次(较慢)
| 模型 | FP32 VRAM | FP16 VRAM |
|---|---|---|
| musicgen-small | ~4GB | ~2GB |
| musicgen-medium | ~8GB | ~4GB |
| musicgen-large | ~16GB | ~8GB |
| 问题 | 解决方案 |
|---|---|
| CUDA 内存不足 | 使用较小的模型,减少时长 |
| 质量差 | 增加 cfg_coef,使用更好的提示 |
| 生成太短 | 检查最大时长设置 |
| 音频伪影 | 尝试不同的温度值 |
| 立体声不工作 | 使用立体声模型变体 |
每周安装量
264
仓库
GitHub 星标
23.4K
首次出现
2026年1月21日
安全审计
安装于
opencode222
gemini-cli209
codex201
claude-code198
cursor195
github-copilot185
Comprehensive guide to using Meta's AudioCraft for text-to-music and text-to-audio generation with MusicGen, AudioGen, and EnCodec.
Use AudioCraft when:
Key features:
Use alternatives instead:
# From PyPI
pip install audiocraft
# From GitHub (latest)
pip install git+https://github.com/facebookresearch/audiocraft.git
# Or use HuggingFace Transformers
pip install transformers torch torchaudio
import torchaudio
from audiocraft.models import MusicGen
# Load model
model = MusicGen.get_pretrained('facebook/musicgen-small')
# Set generation parameters
model.set_generation_params(
duration=8, # seconds
top_k=250,
temperature=1.0
)
# Generate from text
descriptions = ["happy upbeat electronic dance music with synths"]
wav = model.generate(descriptions)
# Save audio
torchaudio.save("output.wav", wav[0].cpu(), sample_rate=32000)
from transformers import AutoProcessor, MusicgenForConditionalGeneration
import scipy
# Load model and processor
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
model.to("cuda")
# Generate music
inputs = processor(
text=["80s pop track with bassy drums and synth"],
padding=True,
return_tensors="pt"
).to("cuda")
audio_values = model.generate(
**inputs,
do_sample=True,
guidance_scale=3,
max_new_tokens=256
)
# Save
sampling_rate = model.config.audio_encoder.sampling_rate
scipy.io.wavfile.write("output.wav", rate=sampling_rate, data=audio_values[0, 0].cpu().numpy())
from audiocraft.models import AudioGen
# Load AudioGen
model = AudioGen.get_pretrained('facebook/audiogen-medium')
model.set_generation_params(duration=5)
# Generate sound effects
descriptions = ["dog barking in a park with birds chirping"]
wav = model.generate(descriptions)
torchaudio.save("sound.wav", wav[0].cpu(), sample_rate=16000)
AudioCraft Architecture:
┌──────────────────────────────────────────────────────────────┐
│ Text Encoder (T5) │
│ │ │
│ Text Embeddings │
└────────────────────────┬─────────────────────────────────────┘
│
┌────────────────────────▼─────────────────────────────────────┐
│ Transformer Decoder (LM) │
│ Auto-regressively generates audio tokens │
│ Using efficient token interleaving patterns │
└────────────────────────┬─────────────────────────────────────┘
│
┌────────────────────────▼─────────────────────────────────────┐
│ EnCodec Audio Decoder │
│ Converts tokens back to audio waveform │
└──────────────────────────────────────────────────────────────┘
| Model | Size | Description | Use Case |
|---|---|---|---|
musicgen-small | 300M | Text-to-music | Quick generation |
musicgen-medium | 1.5B | Text-to-music | Balanced |
musicgen-large | 3.3B | Text-to-music | Best quality |
musicgen-melody | 1.5B | Text + melody | Melody conditioning |
| Parameter | Default | Description |
|---|---|---|
duration | 8.0 | Length in seconds (1-120) |
top_k | 250 | Top-k sampling |
top_p | 0.0 | Nucleus sampling (0 = disabled) |
temperature | 1.0 | Sampling temperature |
cfg_coef | 3.0 | Classifier-free guidance |
from audiocraft.models import MusicGen
import torchaudio
model = MusicGen.get_pretrained('facebook/musicgen-medium')
# Configure generation
model.set_generation_params(
duration=30, # Up to 30 seconds
top_k=250, # Sampling diversity
top_p=0.0, # 0 = use top_k only
temperature=1.0, # Creativity (higher = more varied)
cfg_coef=3.0 # Text adherence (higher = stricter)
)
# Generate multiple samples
descriptions = [
"epic orchestral soundtrack with strings and brass",
"chill lo-fi hip hop beat with jazzy piano",
"energetic rock song with electric guitar"
]
# Generate (returns [batch, channels, samples])
wav = model.generate(descriptions)
# Save each
for i, audio in enumerate(wav):
torchaudio.save(f"music_{i}.wav", audio.cpu(), sample_rate=32000)
from audiocraft.models import MusicGen
import torchaudio
# Load melody model
model = MusicGen.get_pretrained('facebook/musicgen-melody')
model.set_generation_params(duration=30)
# Load melody audio
melody, sr = torchaudio.load("melody.wav")
# Generate with melody conditioning
descriptions = ["acoustic guitar folk song"]
wav = model.generate_with_chroma(descriptions, melody, sr)
torchaudio.save("melody_conditioned.wav", wav[0].cpu(), sample_rate=32000)
from audiocraft.models import MusicGen
# Load stereo model
model = MusicGen.get_pretrained('facebook/musicgen-stereo-medium')
model.set_generation_params(duration=15)
descriptions = ["ambient electronic music with wide stereo panning"]
wav = model.generate(descriptions)
# wav shape: [batch, 2, samples] for stereo
print(f"Stereo shape: {wav.shape}") # [1, 2, 480000]
torchaudio.save("stereo.wav", wav[0].cpu(), sample_rate=32000)
from transformers import AutoProcessor, MusicgenForConditionalGeneration
processor = AutoProcessor.from_pretrained("facebook/musicgen-medium")
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-medium")
# Load audio to continue
import torchaudio
audio, sr = torchaudio.load("intro.wav")
# Process with text and audio
inputs = processor(
audio=audio.squeeze().numpy(),
sampling_rate=sr,
text=["continue with a epic chorus"],
padding=True,
return_tensors="pt"
)
# Generate continuation
audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=512)
from audiocraft.models import MusicGen
# Load style model
model = MusicGen.get_pretrained('facebook/musicgen-style')
# Configure generation with style
model.set_generation_params(
duration=30,
cfg_coef=3.0,
cfg_coef_beta=5.0 # Style influence
)
# Configure style conditioner
model.set_style_conditioner_params(
eval_q=3, # RVQ quantizers (1-6)
excerpt_length=3.0 # Style excerpt length
)
# Load style reference
style_audio, sr = torchaudio.load("reference_style.wav")
# Generate with text + style
descriptions = ["upbeat dance track"]
wav = model.generate_with_style(descriptions, style_audio, sr)
# Generate matching style without text prompt
model.set_generation_params(
duration=30,
cfg_coef=3.0,
cfg_coef_beta=None # Disable double CFG for style-only
)
wav = model.generate_with_style([None], style_audio, sr)
from audiocraft.models import AudioGen
import torchaudio
model = AudioGen.get_pretrained('facebook/audiogen-medium')
model.set_generation_params(duration=10)
# Generate various sounds
descriptions = [
"thunderstorm with heavy rain and lightning",
"busy city traffic with car horns",
"ocean waves crashing on rocks",
"crackling campfire in forest"
]
wav = model.generate(descriptions)
for i, audio in enumerate(wav):
torchaudio.save(f"sound_{i}.wav", audio.cpu(), sample_rate=16000)
from audiocraft.models import CompressionModel
import torch
import torchaudio
# Load EnCodec
model = CompressionModel.get_pretrained('facebook/encodec_32khz')
# Load audio
wav, sr = torchaudio.load("audio.wav")
# Ensure correct sample rate
if sr != 32000:
resampler = torchaudio.transforms.Resample(sr, 32000)
wav = resampler(wav)
# Encode to tokens
with torch.no_grad():
encoded = model.encode(wav.unsqueeze(0))
codes = encoded[0] # Audio codes
# Decode back to audio
with torch.no_grad():
decoded = model.decode(codes)
torchaudio.save("reconstructed.wav", decoded[0].cpu(), sample_rate=32000)
import torch
import torchaudio
from audiocraft.models import MusicGen
class MusicGenerator:
def __init__(self, model_name="facebook/musicgen-medium"):
self.model = MusicGen.get_pretrained(model_name)
self.sample_rate = 32000
def generate(self, prompt, duration=30, temperature=1.0, cfg=3.0):
self.model.set_generation_params(
duration=duration,
top_k=250,
temperature=temperature,
cfg_coef=cfg
)
with torch.no_grad():
wav = self.model.generate([prompt])
return wav[0].cpu()
def generate_batch(self, prompts, duration=30):
self.model.set_generation_params(duration=duration)
with torch.no_grad():
wav = self.model.generate(prompts)
return wav.cpu()
def save(self, audio, path):
torchaudio.save(path, audio, sample_rate=self.sample_rate)
# Usage
generator = MusicGenerator()
audio = generator.generate(
"epic cinematic orchestral music",
duration=30,
temperature=1.0
)
generator.save(audio, "epic_music.wav")
import json
from pathlib import Path
from audiocraft.models import AudioGen
import torchaudio
def batch_generate_sounds(sound_specs, output_dir):
"""
Generate multiple sounds from specifications.
Args:
sound_specs: list of {"name": str, "description": str, "duration": float}
output_dir: output directory path
"""
model = AudioGen.get_pretrained('facebook/audiogen-medium')
output_dir = Path(output_dir)
output_dir.mkdir(exist_ok=True)
results = []
for spec in sound_specs:
model.set_generation_params(duration=spec.get("duration", 5))
wav = model.generate([spec["description"]])
output_path = output_dir / f"{spec['name']}.wav"
torchaudio.save(str(output_path), wav[0].cpu(), sample_rate=16000)
results.append({
"name": spec["name"],
"path": str(output_path),
"description": spec["description"]
})
return results
# Usage
sounds = [
{"name": "explosion", "description": "massive explosion with debris", "duration": 3},
{"name": "footsteps", "description": "footsteps on wooden floor", "duration": 5},
{"name": "door", "description": "wooden door creaking and closing", "duration": 2}
]
results = batch_generate_sounds(sounds, "sound_effects/")
import gradio as gr
import torch
import torchaudio
from audiocraft.models import MusicGen
model = MusicGen.get_pretrained('facebook/musicgen-small')
def generate_music(prompt, duration, temperature, cfg_coef):
model.set_generation_params(
duration=duration,
temperature=temperature,
cfg_coef=cfg_coef
)
with torch.no_grad():
wav = model.generate([prompt])
# Save to temp file
path = "temp_output.wav"
torchaudio.save(path, wav[0].cpu(), sample_rate=32000)
return path
demo = gr.Interface(
fn=generate_music,
inputs=[
gr.Textbox(label="Music Description", placeholder="upbeat electronic dance music"),
gr.Slider(1, 30, value=8, label="Duration (seconds)"),
gr.Slider(0.5, 2.0, value=1.0, label="Temperature"),
gr.Slider(1.0, 10.0, value=3.0, label="CFG Coefficient")
],
outputs=gr.Audio(label="Generated Music"),
title="MusicGen Demo"
)
demo.launch()
# Use smaller model
model = MusicGen.get_pretrained('facebook/musicgen-small')
# Clear cache between generations
torch.cuda.empty_cache()
# Generate shorter durations
model.set_generation_params(duration=10) # Instead of 30
# Use half precision
model = model.half()
# Process multiple prompts at once (more efficient)
descriptions = ["prompt1", "prompt2", "prompt3", "prompt4"]
wav = model.generate(descriptions) # Single batch
# Instead of
for desc in descriptions:
wav = model.generate([desc]) # Multiple batches (slower)
| Model | FP32 VRAM | FP16 VRAM |
|---|---|---|
| musicgen-small | ~4GB | ~2GB |
| musicgen-medium | ~8GB | ~4GB |
| musicgen-large | ~16GB | ~8GB |
| Issue | Solution |
|---|---|
| CUDA OOM | Use smaller model, reduce duration |
| Poor quality | Increase cfg_coef, better prompts |
| Generation too short | Check max duration setting |
| Audio artifacts | Try different temperature |
| Stereo not working | Use stereo model variant |
Weekly Installs
264
Repository
GitHub Stars
23.4K
First Seen
Jan 21, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykPass
Installed on
opencode222
gemini-cli209
codex201
claude-code198
cursor195
github-copilot185
超能力技能使用指南:AI助手技能调用优先级与工作流程详解
41,800 周安装
musicgen-melody-large| 3.3B |
| Text + melody |
| Best melody |
musicgen-stereo-* | Varies | Stereo output | Stereo generation |
musicgen-style | 1.5B | Style transfer | Reference-based |
audiogen-medium | 1.5B | Text-to-sound | Sound effects |