重要前提
安装AI Skills的关键前提是:必须科学上网,且开启TUN模式,这一点至关重要,直接决定安装能否顺利完成,在此郑重提醒三遍:科学上网,科学上网,科学上网。查看完整安装教程 →
npx skills add https://github.com/89jobrien/steve --skill golang-performance此技能提供关于优化 Go 应用程序性能的指导,包括性能剖析、内存管理、并发优化以及避免常见的性能陷阱。
import (
"net/http"
_ "net/http/pprof"
)
func main() {
// pprof 端点可在 /debug/pprof/ 访问
go func() {
http.ListenAndServe("localhost:6060", nil)
}()
// 主应用程序
}
# 收集 30 秒的 CPU 性能剖析数据
go tool pprof http://localhost:6060/debug/pprof/profile?seconds=30
# 交互式命令
(pprof) top10 # 按 CPU 使用排序的前 10 个函数
(pprof) list FuncName # 显示带时间信息的源代码
(pprof) web # 在浏览器中打开火焰图
# 堆剖析
go tool pprof http://localhost:6060/debug/pprof/heap
# 分配剖析(所有分配)
go tool pprof http://localhost:6060/debug/pprof/allocs
# 交互式命令
(pprof) top10 -cum # 按累积分配排序的前 10 个
(pprof) list FuncName # 显示分配位置
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
import (
"os"
"runtime/pprof"
)
func profileCPU() {
f, _ := os.Create("cpu.prof")
defer f.Close()
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
// 需要剖析的代码
}
func profileMemory() {
f, _ := os.Create("mem.prof")
defer f.Close()
runtime.GC() // 获取准确的统计信息
pprof.WriteHeapProfile(f)
}
// 不好:每次调用都分配内存
func Process(items []string) []string {
result := []string{}
for _, item := range items {
result = append(result, transform(item))
}
return result
}
// 好:使用已知容量预分配
func Process(items []string) []string {
result := make([]string, 0, len(items))
for _, item := range items {
result = append(result, transform(item))
}
return result
}
var bufferPool = sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}
func ProcessRequest(data []byte) []byte {
buf := bufferPool.Get().(*bytes.Buffer)
defer func() {
buf.Reset()
bufferPool.Put(buf)
}()
// 使用缓冲区
buf.Write(data)
return buf.Bytes()
}
// 不好:O(n^2) 次分配
func BuildString(parts []string) string {
result := ""
for _, part := range parts {
result += part
}
return result
}
// 好:单次分配
func BuildString(parts []string) string {
var builder strings.Builder
for _, part := range parts {
builder.WriteString(part)
}
return builder.String()
}
// 不好:保持整个底层数组存活
func GetFirst(data []byte) []byte {
return data[:10]
}
// 好:复制以释放底层数组
func GetFirst(data []byte) []byte {
result := make([]byte, 10)
copy(result, data[:10])
return result
}
# 显示逃逸分析决策
go build -gcflags="-m" ./...
# 更详细的信息
go build -gcflags="-m -m" ./...
// 逃逸:返回指针
func NewUser() *User {
return &User{} // 在堆上分配
}
// 保持在栈上:返回值
func NewUser() User {
return User{} // 可能保持在栈上
}
// 逃逸:接口转换
func Process(v interface{}) { ... }
func main() {
x := 42
Process(x) // x 逃逸到堆
}
func ProcessItems(items []Item, workers int) []Result {
jobs := make(chan Item, len(items))
results := make(chan Result, len(items))
// 启动工作协程
var wg sync.WaitGroup
for i := 0; i < workers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for item := range jobs {
results <- process(item)
}
}()
}
// 发送任务
for _, item := range items {
jobs <- item
}
close(jobs)
// 等待并收集结果
go func() {
wg.Wait()
close(results)
}()
var output []Result
for r := range results {
output = append(output, r)
}
return output
}
// 慢:无缓冲导致阻塞
ch := make(chan int)
// 快:缓冲区减少争用
ch := make(chan int, 100)
// 不好:全局锁
var mu sync.Mutex
var cache = make(map[string]string)
func Get(key string) string {
mu.Lock()
defer mu.Unlock()
return cache[key]
}
// 好:分片锁
type ShardedCache struct {
shards [256]struct {
mu sync.RWMutex
items map[string]string
}
}
func (c *ShardedCache) getShard(key string) *struct {
mu sync.RWMutex
items map[string]string
} {
h := fnv.New32a()
h.Write([]byte(key))
return &c.shards[h.Sum32()%256]
}
func (c *ShardedCache) Get(key string) string {
shard := c.getShard(key)
shard.mu.RLock()
defer shard.mu.RUnlock()
return shard.items[key]
}
// 适用于:键写入一次,读取多次;不相交的键集
var cache sync.Map
func Get(key string) (string, bool) {
v, ok := cache.Load(key)
if !ok {
return "", false
}
return v.(string), true
}
func Set(key, value string) {
cache.Store(key, value)
}
// 不好:24 字节(填充)
type Bad struct {
a bool // 1 字节 + 7 填充
b int64 // 8 字节
c bool // 1 字节 + 7 填充
}
// 好:16 字节(无填充)
type Good struct {
b int64 // 8 字节
a bool // 1 字节
c bool // 1 字节 + 6 填充
}
// 慢:类型断言,装箱
func Sum(values []interface{}) float64 {
var sum float64
for _, v := range values {
sum += v.(float64)
}
return sum
}
// 快:具体类型
func Sum(values []float64) float64 {
var sum float64
for _, v := range values {
sum += v
}
return sum
}
func BenchmarkProcess(b *testing.B) {
data := generateTestData()
b.ResetTimer() // 排除设置时间
for i := 0; i < b.N; i++ {
Process(data)
}
}
// 内存基准测试
func BenchmarkAllocs(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = make([]byte, 1024)
}
}
// 比较实现
func BenchmarkComparison(b *testing.B) {
b.Run("old", func(b *testing.B) {
for i := 0; i < b.N; i++ {
OldImplementation()
}
})
b.Run("new", func(b *testing.B) {
for i := 0; i < b.N; i++ {
NewImplementation()
}
})
}
运行:
go test -bench=. -benchmem ./...
go test -bench=. -benchtime=5s ./... # 更长的运行时间
// 不好:每次迭代的 defer 开销
for _, item := range items {
mu.Lock()
defer mu.Unlock() // defer 会累积!
process(item)
}
// 好:显式解锁
for _, item := range items {
mu.Lock()
process(item)
mu.Unlock()
}
// 更好:提取到函数中
for _, item := range items {
processWithLock(item)
}
func processWithLock(item Item) {
mu.Lock()
defer mu.Unlock()
process(item)
}
// 慢:每次调用都使用反射
json.Marshal(v)
// 快:复用编码器
var buf bytes.Buffer
encoder := json.NewEncoder(&buf)
encoder.Encode(v)
// 更快:代码生成(easyjson, ffjson)
make([]T, 0, capacity)sync.Pool每周安装次数
52
仓库
GitHub 星标数
4
首次出现
2026年1月24日
安全审计
安装于
opencode45
codex41
gemini-cli41
github-copilot40
kimi-cli35
amp35
This skill provides guidance on optimizing Go application performance including profiling, memory management, concurrency optimization, and avoiding common performance pitfalls.
import (
"net/http"
_ "net/http/pprof"
)
func main() {
// pprof endpoints available at /debug/pprof/
go func() {
http.ListenAndServe("localhost:6060", nil)
}()
// Main application
}
# Collect 30-second CPU profile
go tool pprof http://localhost:6060/debug/pprof/profile?seconds=30
# Interactive commands
(pprof) top10 # Top 10 functions by CPU
(pprof) list FuncName # Show source with timing
(pprof) web # Open flame graph in browser
# Heap profile
go tool pprof http://localhost:6060/debug/pprof/heap
# Allocs profile (all allocations)
go tool pprof http://localhost:6060/debug/pprof/allocs
# Interactive commands
(pprof) top10 -cum # Top by cumulative allocations
(pprof) list FuncName # Show allocation sites
import (
"os"
"runtime/pprof"
)
func profileCPU() {
f, _ := os.Create("cpu.prof")
defer f.Close()
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
// Code to profile
}
func profileMemory() {
f, _ := os.Create("mem.prof")
defer f.Close()
runtime.GC() // Get accurate stats
pprof.WriteHeapProfile(f)
}
// BAD: Allocates on every call
func Process(items []string) []string {
result := []string{}
for _, item := range items {
result = append(result, transform(item))
}
return result
}
// GOOD: Pre-allocate with known capacity
func Process(items []string) []string {
result := make([]string, 0, len(items))
for _, item := range items {
result = append(result, transform(item))
}
return result
}
var bufferPool = sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}
func ProcessRequest(data []byte) []byte {
buf := bufferPool.Get().(*bytes.Buffer)
defer func() {
buf.Reset()
bufferPool.Put(buf)
}()
// Use buffer
buf.Write(data)
return buf.Bytes()
}
// BAD: O(n^2) allocations
func BuildString(parts []string) string {
result := ""
for _, part := range parts {
result += part
}
return result
}
// GOOD: Single allocation
func BuildString(parts []string) string {
var builder strings.Builder
for _, part := range parts {
builder.WriteString(part)
}
return builder.String()
}
// BAD: Keeps entire backing array alive
func GetFirst(data []byte) []byte {
return data[:10]
}
// GOOD: Copy to release backing array
func GetFirst(data []byte) []byte {
result := make([]byte, 10)
copy(result, data[:10])
return result
}
# Show escape analysis decisions
go build -gcflags="-m" ./...
# More verbose
go build -gcflags="-m -m" ./...
// ESCAPES: Returned pointer
func NewUser() *User {
return &User{} // Allocated on heap
}
// STAYS ON STACK: Value return
func NewUser() User {
return User{} // May stay on stack
}
// ESCAPES: Interface conversion
func Process(v interface{}) { ... }
func main() {
x := 42
Process(x) // x escapes to heap
}
func ProcessItems(items []Item, workers int) []Result {
jobs := make(chan Item, len(items))
results := make(chan Result, len(items))
// Start workers
var wg sync.WaitGroup
for i := 0; i < workers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for item := range jobs {
results <- process(item)
}
}()
}
// Send jobs
for _, item := range items {
jobs <- item
}
close(jobs)
// Wait and collect
go func() {
wg.Wait()
close(results)
}()
var output []Result
for r := range results {
output = append(output, r)
}
return output
}
// SLOW: Unbuffered causes blocking
ch := make(chan int)
// FAST: Buffer reduces contention
ch := make(chan int, 100)
// BAD: Global lock
var mu sync.Mutex
var cache = make(map[string]string)
func Get(key string) string {
mu.Lock()
defer mu.Unlock()
return cache[key]
}
// GOOD: Sharded locks
type ShardedCache struct {
shards [256]struct {
mu sync.RWMutex
items map[string]string
}
}
func (c *ShardedCache) getShard(key string) *struct {
mu sync.RWMutex
items map[string]string
} {
h := fnv.New32a()
h.Write([]byte(key))
return &c.shards[h.Sum32()%256]
}
func (c *ShardedCache) Get(key string) string {
shard := c.getShard(key)
shard.mu.RLock()
defer shard.mu.RUnlock()
return shard.items[key]
}
// Good for: keys written once, read many; disjoint key sets
var cache sync.Map
func Get(key string) (string, bool) {
v, ok := cache.Load(key)
if !ok {
return "", false
}
return v.(string), true
}
func Set(key, value string) {
cache.Store(key, value)
}
// BAD: 24 bytes (padding)
type Bad struct {
a bool // 1 byte + 7 padding
b int64 // 8 bytes
c bool // 1 byte + 7 padding
}
// GOOD: 16 bytes (no padding)
type Good struct {
b int64 // 8 bytes
a bool // 1 byte
c bool // 1 byte + 6 padding
}
// SLOW: Type assertions, boxing
func Sum(values []interface{}) float64 {
var sum float64
for _, v := range values {
sum += v.(float64)
}
return sum
}
// FAST: Concrete types
func Sum(values []float64) float64 {
var sum float64
for _, v := range values {
sum += v
}
return sum
}
func BenchmarkProcess(b *testing.B) {
data := generateTestData()
b.ResetTimer() // Exclude setup time
for i := 0; i < b.N; i++ {
Process(data)
}
}
// Memory benchmarks
func BenchmarkAllocs(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = make([]byte, 1024)
}
}
// Compare implementations
func BenchmarkComparison(b *testing.B) {
b.Run("old", func(b *testing.B) {
for i := 0; i < b.N; i++ {
OldImplementation()
}
})
b.Run("new", func(b *testing.B) {
for i := 0; i < b.N; i++ {
NewImplementation()
}
})
}
Run with:
go test -bench=. -benchmem ./...
go test -bench=. -benchtime=5s ./... # Longer runs
// BAD: Defer overhead per iteration
for _, item := range items {
mu.Lock()
defer mu.Unlock() // Defers stack up!
process(item)
}
// GOOD: Explicit unlock
for _, item := range items {
mu.Lock()
process(item)
mu.Unlock()
}
// BETTER: Extract to function
for _, item := range items {
processWithLock(item)
}
func processWithLock(item Item) {
mu.Lock()
defer mu.Unlock()
process(item)
}
// SLOW: Reflection on every call
json.Marshal(v)
// FAST: Reuse encoder
var buf bytes.Buffer
encoder := json.NewEncoder(&buf)
encoder.Encode(v)
// FASTER: Code generation (easyjson, ffjson)
make([]T, 0, capacity) when size is knownsync.Pool for buffersWeekly Installs
52
Repository
GitHub Stars
4
First Seen
Jan 24, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykPass
Installed on
opencode45
codex41
gemini-cli41
github-copilot40
kimi-cli35
amp35
React 组合模式指南:Vercel 组件架构最佳实践,提升代码可维护性
125,600 周安装