Python Performance by pluginagentmarketplace/custom-plugin-python
npx skills add https://github.com/pluginagentmarketplace/custom-plugin-python --skill 'Python Performance'掌握 Python 性能优化。学习如何分析代码、识别瓶颈、优化算法、高效管理内存,并利用高性能库处理计算密集型任务。
代码示例:
import timeit
import cProfile
import pstats
# 1. timeit 用于微基准测试
def list_comprehension():
return [x**2 for x in range(1000)]
def map_function():
return list(map(lambda x: x**2, range(1000)))
# 比较性能
time_lc = timeit.timeit(list_comprehension, number=10000)
time_map = timeit.timeit(map_function, number=10000)
print(f"List comprehension: {time_lc:.4f}s")
print(f"Map function: {time_map:.4f}s")
# 2. cProfile 用于函数性能分析
def process_data():
data = []
for i in range(100000):
data.append(i ** 2)
return sum(data)
profiler = cProfile.Profile()
profiler.enable()
result = process_data()
profiler.disable()
stats = pstats.Stats(profiler)
stats.sort_stats('cumulative')
stats.print_stats(10)
# 3. 行分析(需要 line_profiler 包)
# @profile 装饰器(为 line_profiler 手动添加)
def slow_function():
total = 0
for i in range(1000000):
total += i ** 2
return total
# 运行命令:kernprof -l -v script.py
# 4. 内存分析
from memory_profiler import profile
@profile
def memory_intensive():
large_list = [i for i in range(1000000)]
large_dict = {i: i**2 for i in range(1000000)}
return len(large_list) + len(large_dict)
# 运行命令:python -m memory_profiler script.py
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
代码示例:
import bisect
from collections import deque, Counter, defaultdict
import time
# 1. 成员测试:列表 vs 集合
# 差:O(n) 查找
def find_in_list(items, target):
return target in items # 线性搜索
# 好:O(1) 查找
def find_in_set(items, target):
items_set = set(items)
return target in items_set
items = list(range(100000))
# 列表:0.001s,集合:0.000001s(快 1000 倍!)
# 2. 生成器表达式提高内存效率
# 差:在内存中创建整个列表
squares_list = [x**2 for x in range(1000000)] # ~4MB
# 好:按需生成
squares_gen = (x**2 for x in range(1000000)) # ~128 字节
# 3. Deque 用于高效的队列操作
# 差:从开头弹出 O(n)
queue_list = list(range(10000))
queue_list.pop(0) # 慢
# 好:从两端弹出 O(1)
queue_deque = deque(range(10000))
queue_deque.popleft() # 快
# 4. Bisect 用于维护有序列表
# 差:向有序列表插入 O(n)
sorted_list = []
for i in [5, 2, 8, 1, 9]:
sorted_list.append(i)
sorted_list.sort()
# 好:插入 O(log n)
sorted_list = []
for i in [5, 2, 8, 1, 9]:
bisect.insort(sorted_list, i)
# 5. Counter 用于频率计数
# 差:手动计数
word_count = {}
for word in words:
if word in word_count:
word_count[word] += 1
else:
word_count[word] = 1
# 好:使用 Counter
word_count = Counter(words)
most_common = word_count.most_common(10)
代码示例:
import gc
import sys
from weakref import WeakValueDictionary
# 1. __slots__ 用于内存高效的类
# 差:普通类(每个实例 56 字节)
class RegularPoint:
def __init__(self, x, y):
self.x = x
self.y = y
# 好:使用 Slots 的类(每个实例 32 字节 - 小 43%!)
class SlottedPoint:
__slots__ = ['x', 'y']
def __init__(self, x, y):
self.x = x
self.y = y
print(sys.getsizeof(RegularPoint(1, 2))) # 56 字节
print(sys.getsizeof(SlottedPoint(1, 2))) # 32 字节
# 2. 对象池用于昂贵的对象
class ObjectPool:
def __init__(self, factory, max_size=10):
self.factory = factory
self.max_size = max_size
self.pool = []
def acquire(self):
if self.pool:
return self.pool.pop()
return self.factory()
def release(self, obj):
if len(self.pool) < self.max_size:
self.pool.append(obj)
# 用法
db_pool = ObjectPool(lambda: DatabaseConnection(), max_size=5)
conn = db_pool.acquire()
# 使用连接
db_pool.release(conn)
# 3. 弱引用防止内存泄漏
class Cache:
def __init__(self):
self._cache = WeakValueDictionary()
def get(self, key):
return self._cache.get(key)
def set(self, key, value):
self._cache[key] = value
# 4. 大型操作的手动垃圾回收
def process_large_dataset():
for batch in large_data:
process_batch(batch)
# 每批处理后强制垃圾回收
gc.collect()
# 5. 用于资源清理的上下文管理器
class ManagedResource:
def __enter__(self):
self.resource = allocate_resource()
return self.resource
def __exit__(self, exc_type, exc_val, exc_tb):
self.resource.cleanup()
return False
代码示例:
import numpy as np
from numba import jit
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor
# 1. NumPy 向量化
# 差:Python 循环(慢)
def python_sum(n):
total = 0
for i in range(n):
total += i ** 2
return total
# 好:NumPy 向量化(快 100 倍!)
def numpy_sum(n):
arr = np.arange(n)
return np.sum(arr ** 2)
# 基准测试:python_sum(1000000) = 0.15s
# numpy_sum(1000000) = 0.002s
# 2. Numba JIT 编译
@jit(nopython=True) # 编译为机器码
def fast_function(n):
total = 0
for i in range(n):
total += i ** 2
return total
# 第一次调用:编译 + 执行
# 后续调用:比纯 Python 快 50 倍!
# 3. 多进程处理 CPU 密集型任务
def cpu_intensive_task(n):
return sum(i * i for i in range(n))
# 单进程
result = cpu_intensive_task(10000000)
# 多进程
with ProcessPoolExecutor(max_workers=4) as executor:
ranges = [2500000, 2500000, 2500000, 2500000]
results = executor.map(cpu_intensive_task, ranges)
total = sum(results)
# 在 4 核上实现 4 倍加速!
# 4. 缓存昂贵的计算
from functools import lru_cache
@lru_cache(maxsize=128)
def fibonacci(n):
if n < 2:
return n
return fibonacci(n-1) + fibonacci(n-2)
# fibonacci(100) 无缓存:~永远
# fibonacci(100) 有缓存:瞬间
# 5. 用于零拷贝操作的内存视图
def process_array(data):
# 差:创建副本
subset = data[1000:2000]
# 好:零拷贝视图
view = memoryview(data)[1000:2000]
构建一个全面的性能分析工具。
要求:
关键技能: 分析工具、可视化、分析
优化数据处理管道。
要求:
关键技能: NumPy、内存优化、基准测试
实现并行算法。
要求:
关键技能: 并行处理、性能测量
掌握 Python 性能优化后,可以探索:
每周安装次数
–
代码仓库
GitHub 星标数
5
首次出现
–
安全审计
Master performance optimization in Python. Learn to profile code, identify bottlenecks, optimize algorithms, manage memory efficiently, and leverage high-performance libraries for compute-intensive tasks.
Code Example:
import timeit
import cProfile
import pstats
# 1. timeit for micro-benchmarks
def list_comprehension():
return [x**2 for x in range(1000)]
def map_function():
return list(map(lambda x: x**2, range(1000)))
# Compare performance
time_lc = timeit.timeit(list_comprehension, number=10000)
time_map = timeit.timeit(map_function, number=10000)
print(f"List comprehension: {time_lc:.4f}s")
print(f"Map function: {time_map:.4f}s")
# 2. cProfile for function profiling
def process_data():
data = []
for i in range(100000):
data.append(i ** 2)
return sum(data)
profiler = cProfile.Profile()
profiler.enable()
result = process_data()
profiler.disable()
stats = pstats.Stats(profiler)
stats.sort_stats('cumulative')
stats.print_stats(10)
# 3. Line profiling (requires line_profiler package)
# @profile decorator (add manually for line_profiler)
def slow_function():
total = 0
for i in range(1000000):
total += i ** 2
return total
# Run with: kernprof -l -v script.py
# 4. Memory profiling
from memory_profiler import profile
@profile
def memory_intensive():
large_list = [i for i in range(1000000)]
large_dict = {i: i**2 for i in range(1000000)}
return len(large_list) + len(large_dict)
# Run with: python -m memory_profiler script.py
Code Example:
import bisect
from collections import deque, Counter, defaultdict
import time
# 1. List vs Set for membership testing
# Bad: O(n) lookup
def find_in_list(items, target):
return target in items # Linear search
# Good: O(1) lookup
def find_in_set(items, target):
items_set = set(items)
return target in items_set
items = list(range(100000))
# List: 0.001s, Set: 0.000001s (1000x faster!)
# 2. Generator expressions for memory efficiency
# Bad: Creates entire list in memory
squares_list = [x**2 for x in range(1000000)] # ~4MB
# Good: Generates on-demand
squares_gen = (x**2 for x in range(1000000)) # ~128 bytes
# 3. Deque for efficient queue operations
# Bad: O(n) pop from beginning
queue_list = list(range(10000))
queue_list.pop(0) # Slow
# Good: O(1) pop from both ends
queue_deque = deque(range(10000))
queue_deque.popleft() # Fast
# 4. Bisect for maintaining sorted lists
# Bad: O(n) insertion into sorted list
sorted_list = []
for i in [5, 2, 8, 1, 9]:
sorted_list.append(i)
sorted_list.sort()
# Good: O(log n) insertion
sorted_list = []
for i in [5, 2, 8, 1, 9]:
bisect.insort(sorted_list, i)
# 5. Counter for frequency counting
# Bad: Manual counting
word_count = {}
for word in words:
if word in word_count:
word_count[word] += 1
else:
word_count[word] = 1
# Good: Counter
word_count = Counter(words)
most_common = word_count.most_common(10)
Code Example:
import gc
import sys
from weakref import WeakValueDictionary
# 1. __slots__ for memory-efficient classes
# Bad: Regular class (56 bytes per instance)
class RegularPoint:
def __init__(self, x, y):
self.x = x
self.y = y
# Good: Slots class (32 bytes per instance - 43% smaller!)
class SlottedPoint:
__slots__ = ['x', 'y']
def __init__(self, x, y):
self.x = x
self.y = y
print(sys.getsizeof(RegularPoint(1, 2))) # 56 bytes
print(sys.getsizeof(SlottedPoint(1, 2))) # 32 bytes
# 2. Object pooling for expensive objects
class ObjectPool:
def __init__(self, factory, max_size=10):
self.factory = factory
self.max_size = max_size
self.pool = []
def acquire(self):
if self.pool:
return self.pool.pop()
return self.factory()
def release(self, obj):
if len(self.pool) < self.max_size:
self.pool.append(obj)
# Usage
db_pool = ObjectPool(lambda: DatabaseConnection(), max_size=5)
conn = db_pool.acquire()
# Use connection
db_pool.release(conn)
# 3. Weak references to prevent memory leaks
class Cache:
def __init__(self):
self._cache = WeakValueDictionary()
def get(self, key):
return self._cache.get(key)
def set(self, key, value):
self._cache[key] = value
# 4. Manual garbage collection for large operations
def process_large_dataset():
for batch in large_data:
process_batch(batch)
# Force garbage collection after each batch
gc.collect()
# 5. Context managers for resource cleanup
class ManagedResource:
def __enter__(self):
self.resource = allocate_resource()
return self.resource
def __exit__(self, exc_type, exc_val, exc_tb):
self.resource.cleanup()
return False
Code Example:
import numpy as np
from numba import jit
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor
# 1. NumPy vectorization
# Bad: Python loops (slow)
def python_sum(n):
total = 0
for i in range(n):
total += i ** 2
return total
# Good: NumPy vectorization (100x faster!)
def numpy_sum(n):
arr = np.arange(n)
return np.sum(arr ** 2)
# Benchmark: python_sum(1000000) = 0.15s
# numpy_sum(1000000) = 0.002s
# 2. Numba JIT compilation
@jit(nopython=True) # Compile to machine code
def fast_function(n):
total = 0
for i in range(n):
total += i ** 2
return total
# First call: compilation + execution
# Subsequent calls: 50x faster than pure Python!
# 3. Multiprocessing for CPU-bound tasks
def cpu_intensive_task(n):
return sum(i * i for i in range(n))
# Single process
result = cpu_intensive_task(10000000)
# Multiple processes
with ProcessPoolExecutor(max_workers=4) as executor:
ranges = [2500000, 2500000, 2500000, 2500000]
results = executor.map(cpu_intensive_task, ranges)
total = sum(results)
# 4x speedup on 4 cores!
# 4. Caching for expensive computations
from functools import lru_cache
@lru_cache(maxsize=128)
def fibonacci(n):
if n < 2:
return n
return fibonacci(n-1) + fibonacci(n-2)
# fibonacci(100) without cache: ~forever
# fibonacci(100) with cache: instant
# 5. Memory views for zero-copy operations
def process_array(data):
# Bad: Creates copy
subset = data[1000:2000]
# Good: Zero-copy view
view = memoryview(data)[1000:2000]
Build a comprehensive profiling tool.
Requirements:
Key Skills: Profiling tools, visualization, analysis
Optimize data processing pipeline.
Requirements:
Key Skills: NumPy, memory optimization, benchmarking
Implement parallel algorithms.
Requirements:
Key Skills: Parallelism, performance measurement
After mastering Python performance, explore:
Weekly Installs
–
Repository
GitHub Stars
5
First Seen
–
Security Audits
agent-browser 浏览器自动化工具 - Vercel Labs 命令行网页操作与测试
147,400 周安装