npx skills add https://github.com/doanchienthangdev/omgkit --skill finetuning针对特定任务调整基础模型。
def training_memory_gb(num_params_billion, precision="fp16"):
bytes_per = {"fp32": 4, "fp16": 2, "int8": 1}
model = num_params_billion * 1e9 * bytes_per[precision]
optimizer = num_params_billion * 1e9 * 4 * 2 # AdamW states
gradients = num_params_billion * 1e9 * bytes_per[precision]
return (model + optimizer + gradients) / 1e9
# 7B 模型全量微调:约 112 GB!
# 使用 LoRA:约 16 GB
# 使用 QLoRA:约 6 GB
from peft import LoraConfig, get_peft_model
config = LoraConfig(
r=8, # 秩(越低参数越少)
lora_alpha=32, # 缩放因子
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
task_type="CAUSAL_LM"
)
model = get_peft_model(base_model, config)
# 仅训练约 7B 参数的 0.06%!
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="auto"
)
model = get_peft_model(model, lora_config)
# 7B 模型可在 16GB GPU 上运行!
from transformers import Trainer, TrainingArguments
args = TrainingArguments(
output_dir="./results",
num_train_epochs=3,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
learning_rate=2e-5,
warmup_steps=100,
fp16=True,
gradient_checkpointing=True,
optim="paged_adamw_8bit"
)
trainer = Trainer(
model=model,
args=args,
train_dataset=train_data,
eval_dataset=eval_data
)
trainer.train()
# 将 LoRA 合并回原模型
merged = model.merge_and_unload()
merged.save_pretrained("./finetuned")
def task_vector_merge(base, finetuned_models, scale=0.3):
merged = base.state_dict()
for ft in finetuned_models:
for key in merged:
task_vector = ft.state_dict()[key] - merged[key]
merged[key] += scale * task_vector
return merged
每周安装次数
1
代码仓库
GitHub 星标数
3
首次出现
1 天前
安全审计
安装于
zencoder1
amp1
cline1
openclaw1
opencode1
cursor1
Adapting Foundation Models for specific tasks.
def training_memory_gb(num_params_billion, precision="fp16"):
bytes_per = {"fp32": 4, "fp16": 2, "int8": 1}
model = num_params_billion * 1e9 * bytes_per[precision]
optimizer = num_params_billion * 1e9 * 4 * 2 # AdamW states
gradients = num_params_billion * 1e9 * bytes_per[precision]
return (model + optimizer + gradients) / 1e9
# 7B model full finetuning: ~112 GB!
# With LoRA: ~16 GB
# With QLoRA: ~6 GB
from peft import LoraConfig, get_peft_model
config = LoraConfig(
r=8, # Rank (lower = fewer params)
lora_alpha=32, # Scaling factor
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
task_type="CAUSAL_LM"
)
model = get_peft_model(base_model, config)
# ~0.06% of 7B trainable!
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="auto"
)
model = get_peft_model(model, lora_config)
# 7B on 16GB GPU!
from transformers import Trainer, TrainingArguments
args = TrainingArguments(
output_dir="./results",
num_train_epochs=3,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
learning_rate=2e-5,
warmup_steps=100,
fp16=True,
gradient_checkpointing=True,
optim="paged_adamw_8bit"
)
trainer = Trainer(
model=model,
args=args,
train_dataset=train_data,
eval_dataset=eval_data
)
trainer.train()
# Merge LoRA back
merged = model.merge_and_unload()
merged.save_pretrained("./finetuned")
def task_vector_merge(base, finetuned_models, scale=0.3):
merged = base.state_dict()
for ft in finetuned_models:
for key in merged:
task_vector = ft.state_dict()[key] - merged[key]
merged[key] += scale * task_vector
return merged
Weekly Installs
1
Repository
GitHub Stars
3
First Seen
1 day ago
Security Audits
Gen Agent Trust HubPassSocketPassSnykPass
Installed on
zencoder1
amp1
cline1
openclaw1
opencode1
cursor1
超能力技能使用指南:AI助手技能调用优先级与工作流程详解
45,100 周安装