recommendation-engine by secondsky/claude-skills
npx skills add https://github.com/secondsky/claude-skills --skill recommendation-engine构建推荐系统,用于个性化内容和产品推荐。
| 方法 | 工作原理 | 优点 | 缺点 |
|---|---|---|---|
| 协同过滤 | 用户-物品交互 | 发现隐藏模式 | 冷启动问题 |
| 基于内容 | 物品特征 | 适用于新物品 | 发现能力有限 |
| 混合方法 | 结合两者 | 兼具两者优点 | 复杂 |
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
class CollaborativeFilter:
def __init__(self):
self.user_similarity = None
self.item_similarity = None
def fit(self, user_item_matrix):
# User-based similarity
self.user_similarity = cosine_similarity(user_item_matrix)
# Item-based similarity
self.item_similarity = cosine_similarity(user_item_matrix.T)
def recommend_for_user(self, user_id, n=10):
scores = self.user_similarity[user_id].dot(self.user_item_matrix)
# Exclude already interacted items
already_interacted = self.user_item_matrix[user_id].nonzero()[0]
scores[already_interacted] = -np.inf
return np.argsort(scores)[-n:][::-1]
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
from sklearn.decomposition import TruncatedSVD
class MatrixFactorization:
def __init__(self, n_factors=50):
self.svd = TruncatedSVD(n_components=n_factors)
def fit(self, user_item_matrix):
self.user_factors = self.svd.fit_transform(user_item_matrix)
self.item_factors = self.svd.components_.T
def predict(self, user_id, item_id):
return np.dot(self.user_factors[user_id], self.item_factors[item_id])
class HybridRecommender:
def __init__(self, collab_weight=0.7, content_weight=0.3):
self.collab = CollaborativeFilter()
self.content = ContentBasedFilter()
self.weights = (collab_weight, content_weight)
def recommend(self, user_id, n=10):
collab_scores = self.collab.score(user_id)
content_scores = self.content.score(user_id)
combined = self.weights[0] * collab_scores + self.weights[1] * content_scores
return np.argsort(combined)[-n:][::-1]
from scipy.sparse import csr_matrix
import numpy as np
# 1. 准备用户-物品交互矩阵
# rows = users, cols = items, values = ratings/interactions
ratings_data = [(0, 5, 5), (0, 10, 4), (1, 5, 3), ...] # (user, item, rating)
n_users, n_items = 1000, 5000
row_idx = [r[0] for r in ratings_data]
col_idx = [r[1] for r in ratings_data]
ratings = [r[2] for r in ratings_data]
user_item_matrix = csr_matrix((ratings, (row_idx, col_idx)), shape=(n_users, n_items))
# 2. 选择并训练模型
from recommendation_engine import ItemBasedCollaborativeFilter # See references
model = ItemBasedCollaborativeFilter(similarity_metric='cosine', k_neighbors=20)
model.fit(user_item_matrix)
# 3. 生成推荐
recommendations = model.recommend(user_id=42, n=10)
print(recommendations) # [(item_id, score), ...]
# 4. 在测试集上评估
from evaluation_metrics import precision_at_k, recall_at_k
test_items = {42: {10, 25, 30}} # True relevant items for user 42
rec_items = [item for item, score in recommendations]
precision = precision_at_k(rec_items, test_items[42], k=10)
recall = recall_at_k(rec_items, test_items[42], k=10)
print(f"Precision@10: {precision:.3f}, Recall@10: {recall:.3f}")
# 5. 处理冷启动
from cold_start import PopularityRecommender
popularity_model = PopularityRecommender()
popularity_model.fit(interactions_with_timestamps)
new_user_recs = popularity_model.recommend(n=10)
问题 : 只推荐热门物品,忽略长尾物品。降低多样性和意外发现性。
解决方案 : 平衡流行度与个性化,应用重排序以提高多样性:
def diversify_recommendations(
recommendations: List[Tuple[int, float]],
item_features: np.ndarray,
diversity_weight: float = 0.3
) -> List[Tuple[int, float]]:
"""Re-rank to increase diversity while maintaining relevance."""
from sklearn.metrics.pairwise import cosine_distances
selected = []
candidates = recommendations.copy()
while len(selected) < len(recommendations) and candidates:
if not selected:
# First item: highest score
selected.append(candidates.pop(0))
continue
# Compute diversity scores
selected_features = item_features[[item for item, _ in selected]]
diversity_scores = []
for item, relevance in candidates:
item_feature = item_features[item].reshape(1, -1)
# Average distance to already selected items
avg_distance = cosine_distances(item_feature, selected_features).mean()
# Combined score: relevance + diversity
combined = (1 - diversity_weight) * relevance + diversity_weight * avg_distance
diversity_scores.append((item, relevance, combined))
# Select item with best combined score
best = max(diversity_scores, key=lambda x: x[2])
selected.append((best[0], best[1]))
candidates = [(i, s) for i, s, _ in diversity_scores if i != best[0]]
return selected
问题 : 当大多数用户对 <1% 的物品进行过评分时,协同过滤会失效。
解决方案 : 使用矩阵分解 (SVD, ALS) 替代基于内存的协同过滤:
# ❌ Bad: User-based CF on sparse data (fails to find similar users)
user_cf = UserBasedCollaborativeFilter()
user_cf.fit(sparse_matrix) # Most users have <10 ratings
# ✅ Good: Matrix factorization handles sparsity
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=50)
user_factors = svd.fit_transform(sparse_matrix)
item_factors = svd.components_.T
# Predict rating: user_factors[u] @ item_factors[i]
问题 : 对于新用户/新物品,推荐器崩溃或返回空结果。
解决方案 : 始终实现备用链:
def recommend_with_fallback(user_id, n=10):
"""Graceful degradation through fallback chain."""
try:
# Try personalized recommendations
if has_sufficient_history(user_id, min_interactions=5):
return collaborative_filter.recommend(user_id, n)
except Exception as e:
logger.warning(f"CF failed for user {user_id}: {e}")
# Fallback 1: Demographic-based
if user_demographics_available(user_id):
return demographic_recommender.recommend(user_id, n)
# Fallback 2: Popularity
return popularity_recommender.recommend(n)
问题 : 推荐用户已购买/查看过的物品会浪费推荐位。
解决方案 : 始终过滤已交互物品:
# ✅ Correct: Exclude interacted items
user_items = user_item_matrix[user_id].nonzero()[1]
scores[user_items] = -np.inf # Ensure they don't appear in top-K
recommendations = np.argsort(scores)[-n:][::-1]
# ❌ Wrong: Forgetting to filter
recommendations = np.argsort(scores)[-n:][::-1] # May include already purchased!
问题 : 将所有点击/查看视为同等重要。1 次查看 ≠ 100 次查看。
解决方案 : 根据交互强度加权 (查看次数、观看时长等):
# For implicit feedback, use confidence weighting
confidence_matrix = 1 + alpha * np.log(1 + interaction_counts)
# In ALS: C_ui * (P_ui - X_ui)²
# Higher confidence for items with more interactions
问题 : 高预测准确率 (RMSE) 并不意味着好的 top-K 推荐。
解决方案 : 使用排序指标 (NDCG, MAP@K):
# ❌ Bad: Only RMSE
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
# ✅ Good: Ranking metrics for top-K evaluation
from evaluation_metrics import ndcg_at_k, mean_average_precision_at_k
# NDCG rewards putting highly relevant items first
ndcg = ndcg_at_k(recommendations, relevance_scores, k=10)
# MAP@K considers precision at each relevant item position
map_score = mean_average_precision_at_k(all_recommendations, ground_truth, k=10)
问题 : 总是推荐相似物品会限制发现性,长期来看会降低用户参与度。
解决方案 : 实施探索-利用策略:
class ExploreExploitRecommender:
def __init__(self, base_model, epsilon=0.1):
self.base_model = base_model
self.epsilon = epsilon # 10% exploration
def recommend(self, user_id, n=10):
# Exploit: Use trained model for most recommendations
n_exploit = int(n * (1 - self.epsilon))
exploitative_recs = self.base_model.recommend(user_id, n=n_exploit)
# Explore: Add random diverse items
n_explore = n - n_exploit
explored_items = sample_diverse_items(n_explore)
return exploitative_recs + explored_items
需要详细实现时加载参考文件:
协同过滤 : 加载 references/collaborative-filtering-deep-dive.md 以获取完整的基于用户和基于物品的协同过滤实现,包括相似度度量 (余弦、皮尔逊、杰卡德)、可扩展性优化 (稀疏矩阵、近似最近邻) 以及处理边缘情况 (冷启动、稀疏性)
矩阵分解 : 加载 references/matrix-factorization-methods.md 以获取 SVD、ALS 和 NMF 实现,包括超参数调优、隐式反馈处理以及高级技术 (BPR, WARP)
评估指标 : 加载 references/evaluation-metrics-implementation.md 以获取 Precision@K、Recall@K、NDCG、覆盖率、多样性指标、交叉验证策略以及统计显著性检验 (配对 t 检验、自助法置信区间)
冷启动解决方案 : 加载 references/cold-start-strategies.md 以获取新用户/新物品策略 (基于流行度、引导、人口统计、基于内容的引导、主动学习)、探索-利用方法 (ε-greedy、Thompson 采样) 以及混合备用链
每周安装数
79
仓库
GitHub 星标数
93
首次出现
2026年1月25日
安全审计
安装于
gemini-cli67
claude-code67
codex64
opencode64
cursor64
github-copilot61
Build recommendation systems for personalized content and product suggestions.
| Approach | How It Works | Pros | Cons |
|---|---|---|---|
| Collaborative | User-item interactions | Discovers hidden patterns | Cold start |
| Content-based | Item features | Works for new items | Limited discovery |
| Hybrid | Combines both | Best of both | Complex |
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
class CollaborativeFilter:
def __init__(self):
self.user_similarity = None
self.item_similarity = None
def fit(self, user_item_matrix):
# User-based similarity
self.user_similarity = cosine_similarity(user_item_matrix)
# Item-based similarity
self.item_similarity = cosine_similarity(user_item_matrix.T)
def recommend_for_user(self, user_id, n=10):
scores = self.user_similarity[user_id].dot(self.user_item_matrix)
# Exclude already interacted items
already_interacted = self.user_item_matrix[user_id].nonzero()[0]
scores[already_interacted] = -np.inf
return np.argsort(scores)[-n:][::-1]
from sklearn.decomposition import TruncatedSVD
class MatrixFactorization:
def __init__(self, n_factors=50):
self.svd = TruncatedSVD(n_components=n_factors)
def fit(self, user_item_matrix):
self.user_factors = self.svd.fit_transform(user_item_matrix)
self.item_factors = self.svd.components_.T
def predict(self, user_id, item_id):
return np.dot(self.user_factors[user_id], self.item_factors[item_id])
class HybridRecommender:
def __init__(self, collab_weight=0.7, content_weight=0.3):
self.collab = CollaborativeFilter()
self.content = ContentBasedFilter()
self.weights = (collab_weight, content_weight)
def recommend(self, user_id, n=10):
collab_scores = self.collab.score(user_id)
content_scores = self.content.score(user_id)
combined = self.weights[0] * collab_scores + self.weights[1] * content_scores
return np.argsort(combined)[-n:][::-1]
from scipy.sparse import csr_matrix
import numpy as np
# 1. Prepare user-item interaction matrix
# rows = users, cols = items, values = ratings/interactions
ratings_data = [(0, 5, 5), (0, 10, 4), (1, 5, 3), ...] # (user, item, rating)
n_users, n_items = 1000, 5000
row_idx = [r[0] for r in ratings_data]
col_idx = [r[1] for r in ratings_data]
ratings = [r[2] for r in ratings_data]
user_item_matrix = csr_matrix((ratings, (row_idx, col_idx)), shape=(n_users, n_items))
# 2. Choose and train model
from recommendation_engine import ItemBasedCollaborativeFilter # See references
model = ItemBasedCollaborativeFilter(similarity_metric='cosine', k_neighbors=20)
model.fit(user_item_matrix)
# 3. Generate recommendations
recommendations = model.recommend(user_id=42, n=10)
print(recommendations) # [(item_id, score), ...]
# 4. Evaluate on test set
from evaluation_metrics import precision_at_k, recall_at_k
test_items = {42: {10, 25, 30}} # True relevant items for user 42
rec_items = [item for item, score in recommendations]
precision = precision_at_k(rec_items, test_items[42], k=10)
recall = recall_at_k(rec_items, test_items[42], k=10)
print(f"Precision@10: {precision:.3f}, Recall@10: {recall:.3f}")
# 5. Handle cold start
from cold_start import PopularityRecommender
popularity_model = PopularityRecommender()
popularity_model.fit(interactions_with_timestamps)
new_user_recs = popularity_model.recommend(n=10)
Problem : Recommending only popular items, ignoring long tail. Reduces diversity and serendipity.
Solution : Balance popularity with personalization, apply re-ranking for diversity:
def diversify_recommendations(
recommendations: List[Tuple[int, float]],
item_features: np.ndarray,
diversity_weight: float = 0.3
) -> List[Tuple[int, float]]:
"""Re-rank to increase diversity while maintaining relevance."""
from sklearn.metrics.pairwise import cosine_distances
selected = []
candidates = recommendations.copy()
while len(selected) < len(recommendations) and candidates:
if not selected:
# First item: highest score
selected.append(candidates.pop(0))
continue
# Compute diversity scores
selected_features = item_features[[item for item, _ in selected]]
diversity_scores = []
for item, relevance in candidates:
item_feature = item_features[item].reshape(1, -1)
# Average distance to already selected items
avg_distance = cosine_distances(item_feature, selected_features).mean()
# Combined score: relevance + diversity
combined = (1 - diversity_weight) * relevance + diversity_weight * avg_distance
diversity_scores.append((item, relevance, combined))
# Select item with best combined score
best = max(diversity_scores, key=lambda x: x[2])
selected.append((best[0], best[1]))
candidates = [(i, s) for i, s, _ in diversity_scores if i != best[0]]
return selected
Problem : Collaborative filtering fails when most users have rated <1% of items.
Solution : Use matrix factorization (SVD, ALS) instead of memory-based CF:
# ❌ Bad: User-based CF on sparse data (fails to find similar users)
user_cf = UserBasedCollaborativeFilter()
user_cf.fit(sparse_matrix) # Most users have <10 ratings
# ✅ Good: Matrix factorization handles sparsity
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=50)
user_factors = svd.fit_transform(sparse_matrix)
item_factors = svd.components_.T
# Predict rating: user_factors[u] @ item_factors[i]
Problem : Recommender crashes or returns empty results for new users/items.
Solution : Always implement fallback chain:
def recommend_with_fallback(user_id, n=10):
"""Graceful degradation through fallback chain."""
try:
# Try personalized recommendations
if has_sufficient_history(user_id, min_interactions=5):
return collaborative_filter.recommend(user_id, n)
except Exception as e:
logger.warning(f"CF failed for user {user_id}: {e}")
# Fallback 1: Demographic-based
if user_demographics_available(user_id):
return demographic_recommender.recommend(user_id, n)
# Fallback 2: Popularity
return popularity_recommender.recommend(n)
Problem : Recommending items user already purchased/viewed wastes recommendation slots.
Solution : Always filter interacted items:
# ✅ Correct: Exclude interacted items
user_items = user_item_matrix[user_id].nonzero()[1]
scores[user_items] = -np.inf # Ensure they don't appear in top-K
recommendations = np.argsort(scores)[-n:][::-1]
# ❌ Wrong: Forgetting to filter
recommendations = np.argsort(scores)[-n:][::-1] # May include already purchased!
Problem : Treating all clicks/views equally. 1 view ≠ 100 views.
Solution : Weight by interaction strength (view count, watch time, etc.):
# For implicit feedback, use confidence weighting
confidence_matrix = 1 + alpha * np.log(1 + interaction_counts)
# In ALS: C_ui * (P_ui - X_ui)²
# Higher confidence for items with more interactions
Problem : High prediction accuracy (RMSE) doesn't mean good top-K recommendations.
Solution : Use ranking metrics (NDCG, MAP@K):
# ❌ Bad: Only RMSE
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
# ✅ Good: Ranking metrics for top-K evaluation
from evaluation_metrics import ndcg_at_k, mean_average_precision_at_k
# NDCG rewards putting highly relevant items first
ndcg = ndcg_at_k(recommendations, relevance_scores, k=10)
# MAP@K considers precision at each relevant item position
map_score = mean_average_precision_at_k(all_recommendations, ground_truth, k=10)
Problem : Always recommending similar items limits discovery, reduces user engagement over time.
Solution : Implement explore-exploit strategy:
class ExploreExploitRecommender:
def __init__(self, base_model, epsilon=0.1):
self.base_model = base_model
self.epsilon = epsilon # 10% exploration
def recommend(self, user_id, n=10):
# Exploit: Use trained model for most recommendations
n_exploit = int(n * (1 - self.epsilon))
exploitative_recs = self.base_model.recommend(user_id, n=n_exploit)
# Explore: Add random diverse items
n_explore = n - n_exploit
explored_items = sample_diverse_items(n_explore)
return exploitative_recs + explored_items
Load reference files when you need detailed implementations:
Collaborative Filtering : Load references/collaborative-filtering-deep-dive.md for complete user-based and item-based CF implementations with similarity metrics (cosine, Pearson, Jaccard), scalability optimizations (sparse matrices, approximate nearest neighbors), and handling edge cases (cold start, sparsity)
Matrix Factorization : Load references/matrix-factorization-methods.md for SVD, ALS, and NMF implementations with hyperparameter tuning, implicit feedback handling, and advanced techniques (BPR, WARP)
Evaluation Metrics : Load references/evaluation-metrics-implementation.md for Precision@K, Recall@K, NDCG, coverage, diversity metrics, cross-validation strategies, and statistical significance testing (paired t-test, bootstrap confidence intervals)
Cold Start Solutions : Load references/cold-start-strategies.md for new user/item strategies (popularity-based, onboarding, demographic, content-based bootstrapping, active learning), explore-exploit approaches (ε-greedy, Thompson sampling), and hybrid fallback chains
Weekly Installs
79
Repository
GitHub Stars
93
First Seen
Jan 25, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykPass
Installed on
gemini-cli67
claude-code67
codex64
opencode64
cursor64
github-copilot61
AI Elements:基于shadcn/ui的AI原生应用组件库,快速构建对话界面
69,600 周安装