sqlite-vec by existential-birds/beagle
npx skills add https://github.com/existential-birds/beagle --skill sqlite-vecsqlite-vec 是一个用于向量相似性搜索的轻量级 SQLite 扩展。它允许直接在 SQLite 数据库中存储和查询向量嵌入,无需外部向量数据库。
import sqlite3
import sqlite_vec
from sqlite_vec import serialize_float32
db = sqlite3.connect(":memory:")
db.enable_load_extension(True)
sqlite_vec.load(db)
db.enable_load_extension(False)
-- 创建表
CREATE VIRTUAL TABLE vec_items USING vec0(
embedding float[4]
);
-- 插入向量(在 Python 中使用 serialize_float32())
INSERT INTO vec_items(rowid, embedding)
VALUES (1, X'CDCCCC3DCDCC4C3E9A99993E00008040');
-- KNN 查询
SELECT rowid, distance
FROM vec_items
WHERE embedding MATCH '[0.3, 0.3, 0.3, 0.3]'
AND k = 10
ORDER BY distance;
sqlite-vec 支持三种向量元素类型:
float[N] - 32 位浮点数(每个元素 4 字节)
float[1536] 用于 text-embedding-3-small广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
int8[N] - 8 位有符号整数(每个元素 1 字节)
bit[N] - 二进制向量(每个元素 1 位,打包成字节)
向量必须以二进制 BLOB 或 JSON 字符串形式提供。Python 辅助函数:
from sqlite_vec import serialize_float32, serialize_int8
import struct
# Float32 向量
vector = [0.1, 0.2, 0.3, 0.4]
blob = serialize_float32(vector)
# 等同于:struct.pack("%sf" % len(vector), *vector)
# Int8 向量
int_vector = [1, 2, 3, 4]
blob = serialize_int8(int_vector)
# 等同于:struct.pack("%sb" % len(int_vector), *int_vector)
NumPy 数组可以直接传递(必须转换为 float32):
import numpy as np
embedding = np.array([0.1, 0.2, 0.3, 0.4]).astype(np.float32)
db.execute("SELECT vec_length(?)", [embedding])
vec0 虚拟表是向量搜索的主要数据结构。
CREATE VIRTUAL TABLE vec_documents USING vec0(
document_id integer primary key,
contents_embedding float[768]
);
CREATE VIRTUAL TABLE vec_items USING vec0(
embedding float[768] distance_metric=cosine
);
支持的度量:l2(默认)、cosine、hamming(仅限位向量)
vec0 表支持四种列类型:
包含所有列类型的示例:
CREATE VIRTUAL TABLE vec_knowledge_base USING vec0(
document_id integer primary key,
-- 分区键(分片)
organization_id integer partition key,
created_month text partition key,
-- 向量列
content_embedding float[768] distance_metric=cosine,
-- 元数据列(在 KNN 中可过滤)
document_type text,
language text,
word_count integer,
is_public boolean,
-- 辅助列(不可过滤)
+title text,
+full_content text,
+url text
);
SELECT rowid, distance
FROM vec_items
WHERE embedding MATCH ?
AND k = 10
ORDER BY distance;
关键组成部分:
WHERE embedding MATCH ? - 触发 KNN 查询AND k = 10 - 限制为 10 个最近邻ORDER BY distance - 按接近程度排序结果SELECT document_id, distance
FROM vec_movies
WHERE synopsis_embedding MATCH ?
AND k = 5
AND genre = 'scifi'
AND num_reviews BETWEEN 100 AND 500
AND mean_rating > 3.5
AND contains_violence = false
ORDER BY distance;
元数据支持的运算符:=、!=、>、>=、<、<=、BETWEEN
不支持:IS NULL、LIKE、GLOB、REGEXP、标量函数
SELECT document_id, distance
FROM vec_documents
WHERE contents_embedding MATCH ?
AND k = 20
AND user_id = 123 -- 分区键预过滤
ORDER BY distance;
分区键支持多租户或时间分片。最佳实践:
WITH knn_matches AS (
SELECT document_id, distance
FROM vec_documents
WHERE contents_embedding MATCH ?
AND k = 10
)
SELECT
documents.id,
documents.title,
knn_matches.distance
FROM knn_matches
LEFT JOIN documents ON documents.id = knn_matches.document_id
ORDER BY knn_matches.distance;
用于手动距离计算(非 vec0 表):
-- L2 距离
SELECT vec_distance_l2('[1, 2]', '[3, 4]');
-- 2.8284...
-- 余弦距离
SELECT vec_distance_cosine('[1, 1]', '[2, 2]');
-- ~0.0
-- 汉明距离(位向量)
SELECT vec_distance_hamming(vec_bit(X'F0'), vec_bit(X'0F'));
-- 8
-- Float32
SELECT vec_f32('[.1, .2, .3, 4]'); -- 子类型 223
-- Int8
SELECT vec_int8('[1, 2, 3, 4]'); -- 子类型 225
-- Bit
SELECT vec_bit(X'F0'); -- 子类型 224
-- 获取长度
SELECT vec_length('[1, 2, 3]'); -- 3
-- 获取类型
SELECT vec_type(vec_int8('[1, 2]')); -- 'int8'
-- 转换为 JSON
SELECT vec_to_json(vec_f32('[1, 2]')); -- '[1.000000,2.000000]'
-- 向量相加
SELECT vec_to_json(
vec_add('[.1, .2, .3]', '[.4, .5, .6]')
);
-- '[0.500000,0.700000,0.900000]'
-- 向量相减
SELECT vec_to_json(
vec_sub('[.1, .2, .3]', '[.4, .5, .6]')
);
-- '[-0.300000,-0.300000,-0.300000]'
-- 归一化(L2 范数)
SELECT vec_to_json(
vec_normalize('[2, 3, 1, -4]')
);
-- '[0.365148,0.547723,0.182574,-0.730297]'
-- 切片(用于 Matryoshka 嵌入)
SELECT vec_to_json(
vec_slice('[1, 2, 3, 4]', 0, 2)
);
-- '[1.000000,2.000000]'
-- Matryoshka 模式:先切片后归一化
SELECT vec_normalize(vec_slice(embedding, 0, 256))
FROM vec_items;
-- 二进制量化(正数→1,负数→0)
SELECT vec_quantize_binary('[1, 2, 3, 4, -5, -6, -7, -8]');
-- X'0F'
-- 可视化
SELECT vec_to_json(
vec_quantize_binary('[1, 2, -3, 4, -5, 6, -7, 8]')
);
-- '[0,1,0,0,1,0,1,0]'
-- 遍历元素
SELECT rowid, value
FROM vec_each('[1, 2, 3, 4]');
/*
┌───────┬───────┐
│ rowid │ value │
├───────┼───────┤
│ 0 │ 1 │
│ 1 │ 2 │
│ 2 │ 3 │
│ 3 │ 4 │
└───────┴───────┘
*/
import sqlite3
import sqlite_vec
from sqlite_vec import serialize_float32
# 设置
db = sqlite3.connect(":memory:")
db.enable_load_extension(True)
sqlite_vec.load(db)
db.enable_load_extension(False)
# 创建表
db.execute("""
CREATE VIRTUAL TABLE vec_items USING vec0(
embedding float[4]
)
""")
# 插入向量
items = [
(1, [0.1, 0.1, 0.1, 0.1]),
(2, [0.2, 0.2, 0.2, 0.2]),
(3, [0.3, 0.3, 0.3, 0.3])
]
with db:
for rowid, vector in items:
db.execute(
"INSERT INTO vec_items(rowid, embedding) VALUES (?, ?)",
[rowid, serialize_float32(vector)]
)
# 查询
query = [0.25, 0.25, 0.25, 0.25]
results = db.execute(
"""
SELECT rowid, distance
FROM vec_items
WHERE embedding MATCH ?
AND k = 2
ORDER BY distance
""",
[serialize_float32(query)]
).fetchall()
for rowid, distance in results:
print(f"rowid={rowid}, distance={distance}")
from openai import OpenAI
from sqlite_vec import serialize_float32
client = OpenAI()
# 生成嵌入
response = client.embeddings.create(
input="your text here",
model="text-embedding-3-small"
)
embedding = response.data[0].embedding
# 存储在 sqlite-vec 中
db.execute(
"INSERT INTO vec_documents(id, embedding) VALUES(?, ?)",
[doc_id, serialize_float32(embedding)]
)
# 查询
query_embedding = client.embeddings.create(
input="search query",
model="text-embedding-3-small"
).data[0].embedding
results = db.execute(
"""
SELECT id, distance
FROM vec_documents
WHERE embedding MATCH ?
AND k = 10
""",
[serialize_float32(query_embedding)]
).fetchall()
CREATE VIRTUAL TABLE vec_docs USING vec0(
doc_id integer primary key,
user_id integer partition key,
embedding float[768]
);
SELECT doc_id, distance
FROM vec_docs
WHERE embedding MATCH ? AND k = 10 AND user_id = 123;
SELECT product_id, distance
FROM vec_products
WHERE embedding MATCH ?
AND k = 20
AND category = 'electronics'
AND price < 1000.0
ORDER BY distance;
-- 自适应维度:先切片后归一化
SELECT vec_normalize(vec_slice(embedding, 0, 256))
FROM vec_items;
每周安装量
93
仓库
GitHub 星标数
41
首次出现
2026年1月20日
安全审计
安装于
opencode79
gemini-cli77
codex76
cursor74
github-copilot68
cline60
sqlite-vec is a lightweight SQLite extension for vector similarity search. It enables storing and querying vector embeddings directly in SQLite databases without external vector databases.
import sqlite3
import sqlite_vec
from sqlite_vec import serialize_float32
db = sqlite3.connect(":memory:")
db.enable_load_extension(True)
sqlite_vec.load(db)
db.enable_load_extension(False)
-- Create table
CREATE VIRTUAL TABLE vec_items USING vec0(
embedding float[4]
);
-- Insert vectors (use serialize_float32() in Python)
INSERT INTO vec_items(rowid, embedding)
VALUES (1, X'CDCCCC3DCDCC4C3E9A99993E00008040');
-- KNN query
SELECT rowid, distance
FROM vec_items
WHERE embedding MATCH '[0.3, 0.3, 0.3, 0.3]'
AND k = 10
ORDER BY distance;
sqlite-vec supports three vector element types:
float[N] - 32-bit floating point (4 bytes per element)
float[1536] for text-embedding-3-smallint8[N] - 8-bit signed integers (1 byte per element)
bit[N] - Binary vectors (1 bit per element, packed into bytes)
Vectors must be provided as binary BLOBs or JSON strings. Python helper functions:
from sqlite_vec import serialize_float32, serialize_int8
import struct
# Float32 vectors
vector = [0.1, 0.2, 0.3, 0.4]
blob = serialize_float32(vector)
# Equivalent to: struct.pack("%sf" % len(vector), *vector)
# Int8 vectors
int_vector = [1, 2, 3, 4]
blob = serialize_int8(int_vector)
# Equivalent to: struct.pack("%sb" % len(int_vector), *int_vector)
NumPy arrays can be passed directly (must cast to float32):
import numpy as np
embedding = np.array([0.1, 0.2, 0.3, 0.4]).astype(np.float32)
db.execute("SELECT vec_length(?)", [embedding])
The vec0 virtual table is the primary data structure for vector search.
CREATE VIRTUAL TABLE vec_documents USING vec0(
document_id integer primary key,
contents_embedding float[768]
);
CREATE VIRTUAL TABLE vec_items USING vec0(
embedding float[768] distance_metric=cosine
);
Supported metrics: l2 (default), cosine, hamming (bit vectors only)
vec0 tables support four column types:
Example with all column types:
CREATE VIRTUAL TABLE vec_knowledge_base USING vec0(
document_id integer primary key,
-- Partition keys (sharding)
organization_id integer partition key,
created_month text partition key,
-- Vector column
content_embedding float[768] distance_metric=cosine,
-- Metadata columns (filterable in KNN)
document_type text,
language text,
word_count integer,
is_public boolean,
-- Auxiliary columns (not filterable)
+title text,
+full_content text,
+url text
);
SELECT rowid, distance
FROM vec_items
WHERE embedding MATCH ?
AND k = 10
ORDER BY distance;
Key components:
WHERE embedding MATCH ? - Triggers KNN queryAND k = 10 - Limit to 10 nearest neighborsORDER BY distance - Sort results by proximitySELECT document_id, distance
FROM vec_movies
WHERE synopsis_embedding MATCH ?
AND k = 5
AND genre = 'scifi'
AND num_reviews BETWEEN 100 AND 500
AND mean_rating > 3.5
AND contains_violence = false
ORDER BY distance;
Supported operators on metadata: =, !=, >, >=, <, <=, BETWEEN
Not supported: IS NULL, LIKE, GLOB, REGEXP, scalar functions
SELECT document_id, distance
FROM vec_documents
WHERE contents_embedding MATCH ?
AND k = 20
AND user_id = 123 -- Partition key pre-filters
ORDER BY distance;
Partition keys enable multi-tenant or temporal sharding. Best practices:
WITH knn_matches AS (
SELECT document_id, distance
FROM vec_documents
WHERE contents_embedding MATCH ?
AND k = 10
)
SELECT
documents.id,
documents.title,
knn_matches.distance
FROM knn_matches
LEFT JOIN documents ON documents.id = knn_matches.document_id
ORDER BY knn_matches.distance;
For manual distance calculations (non-vec0 tables):
-- L2 distance
SELECT vec_distance_l2('[1, 2]', '[3, 4]');
-- 2.8284...
-- Cosine distance
SELECT vec_distance_cosine('[1, 1]', '[2, 2]');
-- ~0.0
-- Hamming distance (bit vectors)
SELECT vec_distance_hamming(vec_bit(X'F0'), vec_bit(X'0F'));
-- 8
-- Float32
SELECT vec_f32('[.1, .2, .3, 4]'); -- Subtype 223
-- Int8
SELECT vec_int8('[1, 2, 3, 4]'); -- Subtype 225
-- Bit
SELECT vec_bit(X'F0'); -- Subtype 224
-- Get length
SELECT vec_length('[1, 2, 3]'); -- 3
-- Get type
SELECT vec_type(vec_int8('[1, 2]')); -- 'int8'
-- Convert to JSON
SELECT vec_to_json(vec_f32('[1, 2]')); -- '[1.000000,2.000000]'
-- Add vectors
SELECT vec_to_json(
vec_add('[.1, .2, .3]', '[.4, .5, .6]')
);
-- '[0.500000,0.700000,0.900000]'
-- Subtract vectors
SELECT vec_to_json(
vec_sub('[.1, .2, .3]', '[.4, .5, .6]')
);
-- '[-0.300000,-0.300000,-0.300000]'
-- Normalize (L2 norm)
SELECT vec_to_json(
vec_normalize('[2, 3, 1, -4]')
);
-- '[0.365148,0.547723,0.182574,-0.730297]'
-- Slice (for Matryoshka embeddings)
SELECT vec_to_json(
vec_slice('[1, 2, 3, 4]', 0, 2)
);
-- '[1.000000,2.000000]'
-- Matryoshka pattern: slice then normalize
SELECT vec_normalize(vec_slice(embedding, 0, 256))
FROM vec_items;
-- Binary quantization (positive→1, negative→0)
SELECT vec_quantize_binary('[1, 2, 3, 4, -5, -6, -7, -8]');
-- X'0F'
-- Visualize
SELECT vec_to_json(
vec_quantize_binary('[1, 2, -3, 4, -5, 6, -7, 8]')
);
-- '[0,1,0,0,1,0,1,0]'
-- Iterate through elements
SELECT rowid, value
FROM vec_each('[1, 2, 3, 4]');
/*
┌───────┬───────┐
│ rowid │ value │
├───────┼───────┤
│ 0 │ 1 │
│ 1 │ 2 │
│ 2 │ 3 │
│ 3 │ 4 │
└───────┴───────┘
*/
import sqlite3
import sqlite_vec
from sqlite_vec import serialize_float32
# Setup
db = sqlite3.connect(":memory:")
db.enable_load_extension(True)
sqlite_vec.load(db)
db.enable_load_extension(False)
# Create table
db.execute("""
CREATE VIRTUAL TABLE vec_items USING vec0(
embedding float[4]
)
""")
# Insert vectors
items = [
(1, [0.1, 0.1, 0.1, 0.1]),
(2, [0.2, 0.2, 0.2, 0.2]),
(3, [0.3, 0.3, 0.3, 0.3])
]
with db:
for rowid, vector in items:
db.execute(
"INSERT INTO vec_items(rowid, embedding) VALUES (?, ?)",
[rowid, serialize_float32(vector)]
)
# Query
query = [0.25, 0.25, 0.25, 0.25]
results = db.execute(
"""
SELECT rowid, distance
FROM vec_items
WHERE embedding MATCH ?
AND k = 2
ORDER BY distance
""",
[serialize_float32(query)]
).fetchall()
for rowid, distance in results:
print(f"rowid={rowid}, distance={distance}")
from openai import OpenAI
from sqlite_vec import serialize_float32
client = OpenAI()
# Generate embedding
response = client.embeddings.create(
input="your text here",
model="text-embedding-3-small"
)
embedding = response.data[0].embedding
# Store in sqlite-vec
db.execute(
"INSERT INTO vec_documents(id, embedding) VALUES(?, ?)",
[doc_id, serialize_float32(embedding)]
)
# Query
query_embedding = client.embeddings.create(
input="search query",
model="text-embedding-3-small"
).data[0].embedding
results = db.execute(
"""
SELECT id, distance
FROM vec_documents
WHERE embedding MATCH ?
AND k = 10
""",
[serialize_float32(query_embedding)]
).fetchall()
CREATE VIRTUAL TABLE vec_docs USING vec0(
doc_id integer primary key,
user_id integer partition key,
embedding float[768]
);
SELECT doc_id, distance
FROM vec_docs
WHERE embedding MATCH ? AND k = 10 AND user_id = 123;
SELECT product_id, distance
FROM vec_products
WHERE embedding MATCH ?
AND k = 20
AND category = 'electronics'
AND price < 1000.0
ORDER BY distance;
-- Adaptive dimensions: slice then normalize
SELECT vec_normalize(vec_slice(embedding, 0, 256))
FROM vec_items;
Weekly Installs
93
Repository
GitHub Stars
41
First Seen
Jan 20, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykPass
Installed on
opencode79
gemini-cli77
codex76
cursor74
github-copilot68
cline60
AI 代码实施计划编写技能 | 自动化开发任务分解与 TDD 流程规划工具
49,000 周安装