qdrant-vector-search by davila7/claude-code-templates
npx skills add https://github.com/davila7/claude-code-templates --skill qdrant-vector-search使用 Rust 编写的高性能向量数据库,适用于生产级 RAG 和语义搜索。
在以下情况下使用 Qdrant:
主要特性:
考虑使用替代方案的情况:
# Python 客户端
pip install qdrant-client
# Docker(推荐用于开发)
docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant
# Docker 带持久化存储
docker run -p 6333:6333 -p 6334:6334 \
-v $(pwd)/qdrant_storage:/qdrant/storage \
qdrant/qdrant
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
# 连接到 Qdrant
client = QdrantClient(host="localhost", port=6333)
# 创建集合
client.create_collection(
collection_name="documents",
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
# 插入带有效载荷的向量
client.upsert(
collection_name="documents",
points=[
PointStruct(
id=1,
vector=[0.1, 0.2, ...], # 384 维向量
payload={"title": "文档 1", "category": "tech"}
),
PointStruct(
id=2,
vector=[0.3, 0.4, ...],
payload={"title": "文档 2", "category": "science"}
)
]
)
# 带过滤的搜索
results = client.search(
collection_name="documents",
query_vector=[0.15, 0.25, ...],
query_filter={
"must": [{"key": "category", "match": {"value": "tech"}}]
},
limit=10
)
for point in results:
print(f"ID: {point.id}, 分数: {point.score}, 有效载荷: {point.payload}")
from qdrant_client.models import PointStruct
# 点 = ID + 向量(们) + 有效载荷
point = PointStruct(
id=123, # 整数或 UUID 字符串
vector=[0.1, 0.2, 0.3, ...], # 稠密向量
payload={ # 任意 JSON 元数据
"title": "文档标题",
"category": "tech",
"timestamp": 1699900000,
"tags": ["python", "ml"]
}
)
# 批量 upsert(推荐)
client.upsert(
collection_name="documents",
points=[point1, point2, point3],
wait=True # 等待索引完成
)
from qdrant_client.models import VectorParams, Distance, HnswConfigDiff
# 使用 HNSW 配置创建
client.create_collection(
collection_name="documents",
vectors_config=VectorParams(
size=384, # 向量维度
distance=Distance.COSINE # COSINE, EUCLID, DOT, MANHATTAN
),
hnsw_config=HnswConfigDiff(
m=16, # 每个节点的连接数(默认 16)
ef_construct=100, # 构建时的准确度(默认 100)
full_scan_threshold=10000 # 低于此阈值时切换到暴力搜索
),
on_disk_payload=True # 将有效载荷存储在磁盘上
)
# 集合信息
info = client.get_collection("documents")
print(f"点数: {info.points_count}, 向量数: {info.vectors_count}")
| 度量标准 | 使用场景 | 范围 |
|---|---|---|
COSINE | 文本嵌入,归一化向量 | 0 到 2 |
EUCLID | 空间数据,图像特征 | 0 到 ∞ |
DOT | 推荐,未归一化向量 | -∞ 到 ∞ |
MANHATTAN | 稀疏特征,离散数据 | 0 到 ∞ |
# 简单的最近邻搜索
results = client.search(
collection_name="documents",
query_vector=[0.1, 0.2, ...],
limit=10,
with_payload=True,
with_vectors=False # 不返回向量(更快)
)
from qdrant_client.models import Filter, FieldCondition, MatchValue, Range
# 复杂过滤
results = client.search(
collection_name="documents",
query_vector=query_embedding,
query_filter=Filter(
must=[
FieldCondition(key="category", match=MatchValue(value="tech")),
FieldCondition(key="timestamp", range=Range(gte=1699000000))
],
must_not=[
FieldCondition(key="status", match=MatchValue(value="archived"))
]
),
limit=10
)
# 简写过滤语法
results = client.search(
collection_name="documents",
query_vector=query_embedding,
query_filter={
"must": [
{"key": "category", "match": {"value": "tech"}},
{"key": "price", "range": {"gte": 10, "lte": 100}}
]
},
limit=10
)
from qdrant_client.models import SearchRequest
# 单个请求中的多个查询
results = client.search_batch(
collection_name="documents",
requests=[
SearchRequest(vector=[0.1, ...], limit=5),
SearchRequest(vector=[0.2, ...], limit=5, filter={"must": [...]}),
SearchRequest(vector=[0.3, ...], limit=10)
]
)
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
# 初始化
encoder = SentenceTransformer("all-MiniLM-L6-v2")
client = QdrantClient(host="localhost", port=6333)
# 创建集合
client.create_collection(
collection_name="knowledge_base",
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
# 索引文档
documents = [
{"id": 1, "text": "Python 是一种编程语言", "source": "wiki"},
{"id": 2, "text": "机器学习使用算法", "source": "textbook"},
]
points = [
PointStruct(
id=doc["id"],
vector=encoder.encode(doc["text"]).tolist(),
payload={"text": doc["text"], "source": doc["source"]}
)
for doc in documents
]
client.upsert(collection_name="knowledge_base", points=points)
# RAG 检索
def retrieve(query: str, top_k: int = 5) -> list[dict]:
query_vector = encoder.encode(query).tolist()
results = client.search(
collection_name="knowledge_base",
query_vector=query_vector,
limit=top_k
)
return [{"text": r.payload["text"], "score": r.score} for r in results]
# 在 RAG 管道中使用
context = retrieve("什么是 Python?")
prompt = f"上下文: {context}\n\n问题: 什么是 Python?"
from langchain_community.vectorstores import Qdrant
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Qdrant.from_documents(documents, embeddings, url="http://localhost:6333", collection_name="docs")
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex, StorageContext
vector_store = QdrantVectorStore(client=client, collection_name="llama_docs")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
query_engine = index.as_query_engine()
from qdrant_client.models import VectorParams, Distance
# 具有多种向量类型的集合
client.create_collection(
collection_name="hybrid_search",
vectors_config={
"dense": VectorParams(size=384, distance=Distance.COSINE),
"sparse": VectorParams(size=30000, distance=Distance.DOT)
}
)
# 使用命名向量插入
client.upsert(
collection_name="hybrid_search",
points=[
PointStruct(
id=1,
vector={
"dense": dense_embedding,
"sparse": sparse_embedding
},
payload={"text": "文档文本"}
)
]
)
# 搜索特定向量
results = client.search(
collection_name="hybrid_search",
query_vector=("dense", query_dense), # 指定使用哪个向量
limit=10
)
from qdrant_client.models import SparseVectorParams, SparseIndexParams, SparseVector
# 带有稀疏向量的集合
client.create_collection(
collection_name="sparse_search",
vectors_config={},
sparse_vectors_config={"text": SparseVectorParams(index=SparseIndexParams(on_disk=False))}
)
# 插入稀疏向量
client.upsert(
collection_name="sparse_search",
points=[PointStruct(id=1, vector={"text": SparseVector(indices=[1, 5, 100], values=[0.5, 0.8, 0.2])}, payload={"text": "document"})]
)
from qdrant_client.models import ScalarQuantization, ScalarQuantizationConfig, ScalarType
# 标量量化(减少 4 倍内存)
client.create_collection(
collection_name="quantized",
vectors_config=VectorParams(size=384, distance=Distance.COSINE),
quantization_config=ScalarQuantization(
scalar=ScalarQuantizationConfig(
type=ScalarType.INT8,
quantile=0.99, # 裁剪异常值
always_ram=True # 将量化数据保留在 RAM 中
)
)
)
# 带重新评分的搜索
results = client.search(
collection_name="quantized",
query_vector=query,
search_params={"quantization": {"rescore": True}}, # 对顶部结果重新评分
limit=10
)
from qdrant_client.models import PayloadSchemaType
# 为更快的过滤创建有效载荷索引
client.create_payload_index(
collection_name="documents",
field_name="category",
field_schema=PayloadSchemaType.KEYWORD
)
client.create_payload_index(
collection_name="documents",
field_name="timestamp",
field_schema=PayloadSchemaType.INTEGER
)
# 索引类型: KEYWORD, INTEGER, FLOAT, GEO, TEXT (全文), BOOL
from qdrant_client import QdrantClient
# 连接到 Qdrant Cloud
client = QdrantClient(
url="https://your-cluster.cloud.qdrant.io",
api_key="your-api-key"
)
# 为搜索速度优化(更高召回率)
client.update_collection(
collection_name="documents",
hnsw_config=HnswConfigDiff(ef_construct=200, m=32)
)
# 为索引速度优化(批量加载)
client.update_collection(
collection_name="documents",
optimizer_config={"indexing_threshold": 20000}
)
on_disk_payload带过滤的搜索速度慢:
# 为过滤字段创建有效载荷索引
client.create_payload_index(
collection_name="docs",
field_name="category",
field_schema=PayloadSchemaType.KEYWORD
)
内存不足:
# 启用量化和磁盘存储
client.create_collection(
collection_name="large_collection",
vectors_config=VectorParams(size=384, distance=Distance.COSINE),
quantization_config=ScalarQuantization(...),
on_disk_payload=True
)
连接问题:
# 使用超时和重试
client = QdrantClient(
host="localhost",
port=6333,
timeout=30,
prefer_grpc=True # 使用 gRPC 以获得更好的性能
)
每周安装量
323
代码仓库
GitHub Stars
22.6K
首次出现
Jan 21, 2026
安全审计
安装于
opencode272
gemini-cli261
codex245
cursor242
claude-code234
github-copilot232
High-performance vector database written in Rust for production RAG and semantic search.
Use Qdrant when:
Key features:
Use alternatives instead:
# Python client
pip install qdrant-client
# Docker (recommended for development)
docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant
# Docker with persistent storage
docker run -p 6333:6333 -p 6334:6334 \
-v $(pwd)/qdrant_storage:/qdrant/storage \
qdrant/qdrant
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
# Connect to Qdrant
client = QdrantClient(host="localhost", port=6333)
# Create collection
client.create_collection(
collection_name="documents",
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
# Insert vectors with payload
client.upsert(
collection_name="documents",
points=[
PointStruct(
id=1,
vector=[0.1, 0.2, ...], # 384-dim vector
payload={"title": "Doc 1", "category": "tech"}
),
PointStruct(
id=2,
vector=[0.3, 0.4, ...],
payload={"title": "Doc 2", "category": "science"}
)
]
)
# Search with filtering
results = client.search(
collection_name="documents",
query_vector=[0.15, 0.25, ...],
query_filter={
"must": [{"key": "category", "match": {"value": "tech"}}]
},
limit=10
)
for point in results:
print(f"ID: {point.id}, Score: {point.score}, Payload: {point.payload}")
from qdrant_client.models import PointStruct
# Point = ID + Vector(s) + Payload
point = PointStruct(
id=123, # Integer or UUID string
vector=[0.1, 0.2, 0.3, ...], # Dense vector
payload={ # Arbitrary JSON metadata
"title": "Document title",
"category": "tech",
"timestamp": 1699900000,
"tags": ["python", "ml"]
}
)
# Batch upsert (recommended)
client.upsert(
collection_name="documents",
points=[point1, point2, point3],
wait=True # Wait for indexing
)
from qdrant_client.models import VectorParams, Distance, HnswConfigDiff
# Create with HNSW configuration
client.create_collection(
collection_name="documents",
vectors_config=VectorParams(
size=384, # Vector dimensions
distance=Distance.COSINE # COSINE, EUCLID, DOT, MANHATTAN
),
hnsw_config=HnswConfigDiff(
m=16, # Connections per node (default 16)
ef_construct=100, # Build-time accuracy (default 100)
full_scan_threshold=10000 # Switch to brute force below this
),
on_disk_payload=True # Store payload on disk
)
# Collection info
info = client.get_collection("documents")
print(f"Points: {info.points_count}, Vectors: {info.vectors_count}")
| Metric | Use Case | Range |
|---|---|---|
COSINE | Text embeddings, normalized vectors | 0 to 2 |
EUCLID | Spatial data, image features | 0 to ∞ |
DOT | Recommendations, unnormalized | -∞ to ∞ |
MANHATTAN | Sparse features, discrete data | 0 to ∞ |
# Simple nearest neighbor search
results = client.search(
collection_name="documents",
query_vector=[0.1, 0.2, ...],
limit=10,
with_payload=True,
with_vectors=False # Don't return vectors (faster)
)
from qdrant_client.models import Filter, FieldCondition, MatchValue, Range
# Complex filtering
results = client.search(
collection_name="documents",
query_vector=query_embedding,
query_filter=Filter(
must=[
FieldCondition(key="category", match=MatchValue(value="tech")),
FieldCondition(key="timestamp", range=Range(gte=1699000000))
],
must_not=[
FieldCondition(key="status", match=MatchValue(value="archived"))
]
),
limit=10
)
# Shorthand filter syntax
results = client.search(
collection_name="documents",
query_vector=query_embedding,
query_filter={
"must": [
{"key": "category", "match": {"value": "tech"}},
{"key": "price", "range": {"gte": 10, "lte": 100}}
]
},
limit=10
)
from qdrant_client.models import SearchRequest
# Multiple queries in one request
results = client.search_batch(
collection_name="documents",
requests=[
SearchRequest(vector=[0.1, ...], limit=5),
SearchRequest(vector=[0.2, ...], limit=5, filter={"must": [...]}),
SearchRequest(vector=[0.3, ...], limit=10)
]
)
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, PointStruct
# Initialize
encoder = SentenceTransformer("all-MiniLM-L6-v2")
client = QdrantClient(host="localhost", port=6333)
# Create collection
client.create_collection(
collection_name="knowledge_base",
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)
# Index documents
documents = [
{"id": 1, "text": "Python is a programming language", "source": "wiki"},
{"id": 2, "text": "Machine learning uses algorithms", "source": "textbook"},
]
points = [
PointStruct(
id=doc["id"],
vector=encoder.encode(doc["text"]).tolist(),
payload={"text": doc["text"], "source": doc["source"]}
)
for doc in documents
]
client.upsert(collection_name="knowledge_base", points=points)
# RAG retrieval
def retrieve(query: str, top_k: int = 5) -> list[dict]:
query_vector = encoder.encode(query).tolist()
results = client.search(
collection_name="knowledge_base",
query_vector=query_vector,
limit=top_k
)
return [{"text": r.payload["text"], "score": r.score} for r in results]
# Use in RAG pipeline
context = retrieve("What is Python?")
prompt = f"Context: {context}\n\nQuestion: What is Python?"
from langchain_community.vectorstores import Qdrant
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Qdrant.from_documents(documents, embeddings, url="http://localhost:6333", collection_name="docs")
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex, StorageContext
vector_store = QdrantVectorStore(client=client, collection_name="llama_docs")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
query_engine = index.as_query_engine()
from qdrant_client.models import VectorParams, Distance
# Collection with multiple vector types
client.create_collection(
collection_name="hybrid_search",
vectors_config={
"dense": VectorParams(size=384, distance=Distance.COSINE),
"sparse": VectorParams(size=30000, distance=Distance.DOT)
}
)
# Insert with named vectors
client.upsert(
collection_name="hybrid_search",
points=[
PointStruct(
id=1,
vector={
"dense": dense_embedding,
"sparse": sparse_embedding
},
payload={"text": "document text"}
)
]
)
# Search specific vector
results = client.search(
collection_name="hybrid_search",
query_vector=("dense", query_dense), # Specify which vector
limit=10
)
from qdrant_client.models import SparseVectorParams, SparseIndexParams, SparseVector
# Collection with sparse vectors
client.create_collection(
collection_name="sparse_search",
vectors_config={},
sparse_vectors_config={"text": SparseVectorParams(index=SparseIndexParams(on_disk=False))}
)
# Insert sparse vector
client.upsert(
collection_name="sparse_search",
points=[PointStruct(id=1, vector={"text": SparseVector(indices=[1, 5, 100], values=[0.5, 0.8, 0.2])}, payload={"text": "document"})]
)
from qdrant_client.models import ScalarQuantization, ScalarQuantizationConfig, ScalarType
# Scalar quantization (4x memory reduction)
client.create_collection(
collection_name="quantized",
vectors_config=VectorParams(size=384, distance=Distance.COSINE),
quantization_config=ScalarQuantization(
scalar=ScalarQuantizationConfig(
type=ScalarType.INT8,
quantile=0.99, # Clip outliers
always_ram=True # Keep quantized in RAM
)
)
)
# Search with rescoring
results = client.search(
collection_name="quantized",
query_vector=query,
search_params={"quantization": {"rescore": True}}, # Rescore top results
limit=10
)
from qdrant_client.models import PayloadSchemaType
# Create payload index for faster filtering
client.create_payload_index(
collection_name="documents",
field_name="category",
field_schema=PayloadSchemaType.KEYWORD
)
client.create_payload_index(
collection_name="documents",
field_name="timestamp",
field_schema=PayloadSchemaType.INTEGER
)
# Index types: KEYWORD, INTEGER, FLOAT, GEO, TEXT (full-text), BOOL
from qdrant_client import QdrantClient
# Connect to Qdrant Cloud
client = QdrantClient(
url="https://your-cluster.cloud.qdrant.io",
api_key="your-api-key"
)
# Optimize for search speed (higher recall)
client.update_collection(
collection_name="documents",
hnsw_config=HnswConfigDiff(ef_construct=200, m=32)
)
# Optimize for indexing speed (bulk loads)
client.update_collection(
collection_name="documents",
optimizer_config={"indexing_threshold": 20000}
)
on_disk_payload for large payloadsSlow search with filters:
# Create payload index for filtered fields
client.create_payload_index(
collection_name="docs",
field_name="category",
field_schema=PayloadSchemaType.KEYWORD
)
Out of memory:
# Enable quantization and on-disk storage
client.create_collection(
collection_name="large_collection",
vectors_config=VectorParams(size=384, distance=Distance.COSINE),
quantization_config=ScalarQuantization(...),
on_disk_payload=True
)
Connection issues:
# Use timeout and retry
client = QdrantClient(
host="localhost",
port=6333,
timeout=30,
prefer_grpc=True # gRPC for better performance
)
Weekly Installs
323
Repository
GitHub Stars
22.6K
First Seen
Jan 21, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykPass
Installed on
opencode272
gemini-cli261
codex245
cursor242
claude-code234
github-copilot232
超能力技能使用指南:AI助手技能调用优先级与工作流程详解
39,200 周安装