llamaindex by davila7/claude-code-templates
npx skills add https://github.com/davila7/claude-code-templates --skill llamaindex连接大型语言模型与您的数据的领先框架。
在以下场景中使用 LlamaIndex:
指标:
替代方案:
# 入门包(推荐)
pip install llama-index
# 或最小化核心包 + 特定集成
pip install llama-index-core
pip install llama-index-llms-openai
pip install llama-index-embeddings-openai
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
# 加载文档
documents = SimpleDirectoryReader("data").load_data()
# 创建索引
index = VectorStoreIndex.from_documents(documents)
# 查询
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)
from llama_index.core import SimpleDirectoryReader, Document
from llama_index.readers.web import SimpleWebPageReader
from llama_index.readers.github import GithubRepositoryReader
# 文件目录
documents = SimpleDirectoryReader("./data").load_data()
# 网页
reader = SimpleWebPageReader()
documents = reader.load_data(["https://example.com"])
# GitHub 仓库
reader = GithubRepositoryReader(owner="user", repo="repo")
documents = reader.load_data(branch="main")
# 手动创建文档
doc = Document(
text="This is the document content",
metadata={"source": "manual", "date": "2025-01-01"}
)
from llama_index.core import VectorStoreIndex, ListIndex, TreeIndex
# 向量索引(最常用 - 语义搜索)
vector_index = VectorStoreIndex.from_documents(documents)
# 列表索引(顺序扫描)
list_index = ListIndex.from_documents(documents)
# 树状索引(分层摘要)
tree_index = TreeIndex.from_documents(documents)
# 保存索引
index.storage_context.persist(persist_dir="./storage")
# 加载索引
from llama_index.core import load_index_from_storage, StorageContext
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)
# 基础查询
query_engine = index.as_query_engine()
response = query_engine.query("What is the main topic?")
print(response)
# 流式响应
query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("Explain quantum computing")
for text in response.response_gen:
print(text, end="", flush=True)
# 自定义配置
query_engine = index.as_query_engine(
similarity_top_k=3, # 返回前3个片段
response_mode="compact", # 或 "tree_summarize", "simple_summarize"
verbose=True
)
# 向量检索器
retriever = index.as_retriever(similarity_top_k=5)
nodes = retriever.retrieve("machine learning")
# 带过滤功能
retriever = index.as_retriever(
similarity_top_k=3,
filters={"metadata.category": "tutorial"}
)
# 自定义检索器
from llama_index.core.retrievers import BaseRetriever
class CustomRetriever(BaseRetriever):
def _retrieve(self, query_bundle):
# 您的自定义检索逻辑
return nodes
from llama_index.core.agent import FunctionAgent
from llama_index.llms.openai import OpenAI
# 定义工具
def multiply(a: int, b: int) -> int:
"""Multiply two numbers."""
return a * b
def add(a: int, b: int) -> int:
"""Add two numbers."""
return a + b
# 创建智能体
llm = OpenAI(model="gpt-4o")
agent = FunctionAgent.from_tools(
tools=[multiply, add],
llm=llm,
verbose=True
)
# 使用智能体
response = agent.chat("What is 25 * 17 + 142?")
print(response)
from llama_index.core.tools import QueryEngineTool
# 如前所述创建索引
index = VectorStoreIndex.from_documents(documents)
# 将查询引擎包装为工具
query_tool = QueryEngineTool.from_defaults(
query_engine=index.as_query_engine(),
name="python_docs",
description="Useful for answering questions about Python programming"
)
# 带文档搜索 + 计算器的智能体
agent = FunctionAgent.from_tools(
tools=[query_tool, multiply, add],
llm=llm
)
# 智能体决定何时搜索文档或进行计算
response = agent.chat("According to the docs, what is Python used for?")
from llama_index.core.chat_engine import CondensePlusContextChatEngine
# 带记忆的聊天
chat_engine = index.as_chat_engine(
chat_mode="condense_plus_context", # 或 "context", "react"
verbose=True
)
# 多轮对话
response1 = chat_engine.chat("What is Python?")
response2 = chat_engine.chat("Can you give examples?") # 记住上下文
response3 = chat_engine.chat("What about web frameworks?")
from llama_index.core.vector_stores import MetadataFilters, ExactMatchFilter
# 按元数据过滤
filters = MetadataFilters(
filters=[
ExactMatchFilter(key="category", value="tutorial"),
ExactMatchFilter(key="difficulty", value="beginner")
]
)
retriever = index.as_retriever(
similarity_top_k=3,
filters=filters
)
query_engine = index.as_query_engine(filters=filters)
from pydantic import BaseModel
from llama_index.core.output_parsers import PydanticOutputParser
class Summary(BaseModel):
title: str
main_points: list[str]
conclusion: str
# 获取结构化响应
output_parser = PydanticOutputParser(output_cls=Summary)
query_engine = index.as_query_engine(output_parser=output_parser)
response = query_engine.query("Summarize the document")
summary = response # Pydantic 模型
print(summary.title, summary.main_points)
# 加载所有支持的格式
documents = SimpleDirectoryReader(
"./data",
recursive=True,
required_exts=[".pdf", ".docx", ".txt", ".md"]
).load_data()
from llama_index.readers.web import BeautifulSoupWebReader
reader = BeautifulSoupWebReader()
documents = reader.load_data(urls=[
"https://docs.python.org/3/tutorial/",
"https://docs.python.org/3/library/"
])
from llama_index.readers.database import DatabaseReader
reader = DatabaseReader(
sql_database_uri="postgresql://user:pass@localhost/db"
)
documents = reader.load_data(query="SELECT * FROM articles")
from llama_index.readers.json import JSONReader
reader = JSONReader()
documents = reader.load_data("https://api.example.com/data.json")
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb
# 初始化 Chroma
db = chromadb.PersistentClient(path="./chroma_db")
collection = db.get_or_create_collection("my_collection")
# 创建向量存储
vector_store = ChromaVectorStore(chroma_collection=collection)
# 在索引中使用
from llama_index.core import StorageContext
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
from llama_index.vector_stores.pinecone import PineconeVectorStore
import pinecone
# 初始化 Pinecone
pinecone.init(api_key="your-key", environment="us-west1-gcp")
pinecone_index = pinecone.Index("my-index")
# 创建向量存储
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
from llama_index.vector_stores.faiss import FaissVectorStore
import faiss
# 创建 FAISS 索引
d = 1536 # 嵌入维度
faiss_index = faiss.IndexFlatL2(d)
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
from llama_index.llms.anthropic import Anthropic
from llama_index.core import Settings
# 设置全局 LLM
Settings.llm = Anthropic(model="claude-sonnet-4-5-20250929")
# 现在所有查询都使用 Anthropic
query_engine = index.as_query_engine()
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
# 使用 HuggingFace 嵌入
Settings.embed_model = HuggingFaceEmbedding(
model_name="sentence-transformers/all-mpnet-base-v2"
)
index = VectorStoreIndex.from_documents(documents)
from llama_index.core import PromptTemplate
qa_prompt = PromptTemplate(
"Context: {context_str}\n"
"Question: {query_str}\n"
"Answer the question based only on the context. "
"If the answer is not in the context, say 'I don't know'.\n"
"Answer: "
)
query_engine = index.as_query_engine(text_qa_template=qa_prompt)
from llama_index.core import SimpleDirectoryReader
from llama_index.multi_modal_llms.openai import OpenAIMultiModal
# 加载图像和文档
documents = SimpleDirectoryReader(
"./data",
required_exts=[".jpg", ".png", ".pdf"]
).load_data()
# 多模态索引
index = VectorStoreIndex.from_documents(documents)
# 使用多模态 LLM 查询
multi_modal_llm = OpenAIMultiModal(model="gpt-4o")
query_engine = index.as_query_engine(llm=multi_modal_llm)
response = query_engine.query("What is in the diagram on page 3?")
from llama_index.core.evaluation import RelevancyEvaluator, FaithfulnessEvaluator
# 评估相关性
relevancy = RelevancyEvaluator()
result = relevancy.evaluate_response(
query="What is Python?",
response=response
)
print(f"Relevancy: {result.passing}")
# 评估忠实度(无幻觉)
faithfulness = FaithfulnessEvaluator()
result = faithfulness.evaluate_response(
query="What is Python?",
response=response
)
print(f"Faithfulness: {result.passing}")
# 完整的 RAG 管道
documents = SimpleDirectoryReader("docs").load_data()
index = VectorStoreIndex.from_documents(documents)
index.storage_context.persist(persist_dir="./storage")
# 查询
query_engine = index.as_query_engine(
similarity_top_k=3,
response_mode="compact",
verbose=True
)
response = query_engine.query("What is the main topic?")
print(response)
print(f"Sources: {[node.metadata['file_name'] for node in response.source_nodes]}")
# 对话式界面
chat_engine = index.as_chat_engine(
chat_mode="condense_plus_context",
verbose=True
)
# 多轮聊天
while True:
user_input = input("You: ")
if user_input.lower() == "quit":
break
response = chat_engine.chat(user_input)
print(f"Bot: {response}")
| 操作 | 延迟 | 备注 |
|---|---|---|
| 索引100个文档 | ~10-30秒 | 一次性操作,可持久化 |
| 查询(向量) | ~0.5-2秒 | 检索 + LLM |
| 流式查询 | ~0.5秒首个词元 | 用户体验更佳 |
| 带工具的智能体 | ~3-8秒 | 多个工具调用 |
| 功能 | LlamaIndex | LangChain |
|---|---|---|
| 最适合 | RAG, 文档问答 | 智能体, 通用LLM应用 |
| 数据连接器 | 300+ (LlamaHub) | 100+ |
| RAG专注度 | 核心功能 | 众多功能之一 |
| 学习曲线 | RAG方面更简单 | 更陡峭 |
| 自定义性 | 高 | 非常高 |
| 文档 | 优秀 | 良好 |
在以下场景使用 LlamaIndex:
在以下场景使用 LangChain:
每周安装量
231
代码仓库
GitHub Stars
22.6K
首次出现
Jan 21, 2026
安全审计
安装于
opencode185
gemini-cli177
claude-code167
cursor165
codex162
github-copilot152
The leading framework for connecting LLMs with your data.
Use LlamaIndex when:
Metrics :
Use alternatives instead :
# Starter package (recommended)
pip install llama-index
# Or minimal core + specific integrations
pip install llama-index-core
pip install llama-index-llms-openai
pip install llama-index-embeddings-openai
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
# Load documents
documents = SimpleDirectoryReader("data").load_data()
# Create index
index = VectorStoreIndex.from_documents(documents)
# Query
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)
from llama_index.core import SimpleDirectoryReader, Document
from llama_index.readers.web import SimpleWebPageReader
from llama_index.readers.github import GithubRepositoryReader
# Directory of files
documents = SimpleDirectoryReader("./data").load_data()
# Web pages
reader = SimpleWebPageReader()
documents = reader.load_data(["https://example.com"])
# GitHub repository
reader = GithubRepositoryReader(owner="user", repo="repo")
documents = reader.load_data(branch="main")
# Manual document creation
doc = Document(
text="This is the document content",
metadata={"source": "manual", "date": "2025-01-01"}
)
from llama_index.core import VectorStoreIndex, ListIndex, TreeIndex
# Vector index (most common - semantic search)
vector_index = VectorStoreIndex.from_documents(documents)
# List index (sequential scan)
list_index = ListIndex.from_documents(documents)
# Tree index (hierarchical summary)
tree_index = TreeIndex.from_documents(documents)
# Save index
index.storage_context.persist(persist_dir="./storage")
# Load index
from llama_index.core import load_index_from_storage, StorageContext
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)
# Basic query
query_engine = index.as_query_engine()
response = query_engine.query("What is the main topic?")
print(response)
# Streaming response
query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("Explain quantum computing")
for text in response.response_gen:
print(text, end="", flush=True)
# Custom configuration
query_engine = index.as_query_engine(
similarity_top_k=3, # Return top 3 chunks
response_mode="compact", # Or "tree_summarize", "simple_summarize"
verbose=True
)
# Vector retriever
retriever = index.as_retriever(similarity_top_k=5)
nodes = retriever.retrieve("machine learning")
# With filtering
retriever = index.as_retriever(
similarity_top_k=3,
filters={"metadata.category": "tutorial"}
)
# Custom retriever
from llama_index.core.retrievers import BaseRetriever
class CustomRetriever(BaseRetriever):
def _retrieve(self, query_bundle):
# Your custom retrieval logic
return nodes
from llama_index.core.agent import FunctionAgent
from llama_index.llms.openai import OpenAI
# Define tools
def multiply(a: int, b: int) -> int:
"""Multiply two numbers."""
return a * b
def add(a: int, b: int) -> int:
"""Add two numbers."""
return a + b
# Create agent
llm = OpenAI(model="gpt-4o")
agent = FunctionAgent.from_tools(
tools=[multiply, add],
llm=llm,
verbose=True
)
# Use agent
response = agent.chat("What is 25 * 17 + 142?")
print(response)
from llama_index.core.tools import QueryEngineTool
# Create index as before
index = VectorStoreIndex.from_documents(documents)
# Wrap query engine as tool
query_tool = QueryEngineTool.from_defaults(
query_engine=index.as_query_engine(),
name="python_docs",
description="Useful for answering questions about Python programming"
)
# Agent with document search + calculator
agent = FunctionAgent.from_tools(
tools=[query_tool, multiply, add],
llm=llm
)
# Agent decides when to search docs vs calculate
response = agent.chat("According to the docs, what is Python used for?")
from llama_index.core.chat_engine import CondensePlusContextChatEngine
# Chat with memory
chat_engine = index.as_chat_engine(
chat_mode="condense_plus_context", # Or "context", "react"
verbose=True
)
# Multi-turn conversation
response1 = chat_engine.chat("What is Python?")
response2 = chat_engine.chat("Can you give examples?") # Remembers context
response3 = chat_engine.chat("What about web frameworks?")
from llama_index.core.vector_stores import MetadataFilters, ExactMatchFilter
# Filter by metadata
filters = MetadataFilters(
filters=[
ExactMatchFilter(key="category", value="tutorial"),
ExactMatchFilter(key="difficulty", value="beginner")
]
)
retriever = index.as_retriever(
similarity_top_k=3,
filters=filters
)
query_engine = index.as_query_engine(filters=filters)
from pydantic import BaseModel
from llama_index.core.output_parsers import PydanticOutputParser
class Summary(BaseModel):
title: str
main_points: list[str]
conclusion: str
# Get structured response
output_parser = PydanticOutputParser(output_cls=Summary)
query_engine = index.as_query_engine(output_parser=output_parser)
response = query_engine.query("Summarize the document")
summary = response # Pydantic model
print(summary.title, summary.main_points)
# Load all supported formats
documents = SimpleDirectoryReader(
"./data",
recursive=True,
required_exts=[".pdf", ".docx", ".txt", ".md"]
).load_data()
from llama_index.readers.web import BeautifulSoupWebReader
reader = BeautifulSoupWebReader()
documents = reader.load_data(urls=[
"https://docs.python.org/3/tutorial/",
"https://docs.python.org/3/library/"
])
from llama_index.readers.database import DatabaseReader
reader = DatabaseReader(
sql_database_uri="postgresql://user:pass@localhost/db"
)
documents = reader.load_data(query="SELECT * FROM articles")
from llama_index.readers.json import JSONReader
reader = JSONReader()
documents = reader.load_data("https://api.example.com/data.json")
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb
# Initialize Chroma
db = chromadb.PersistentClient(path="./chroma_db")
collection = db.get_or_create_collection("my_collection")
# Create vector store
vector_store = ChromaVectorStore(chroma_collection=collection)
# Use in index
from llama_index.core import StorageContext
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
from llama_index.vector_stores.pinecone import PineconeVectorStore
import pinecone
# Initialize Pinecone
pinecone.init(api_key="your-key", environment="us-west1-gcp")
pinecone_index = pinecone.Index("my-index")
# Create vector store
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
from llama_index.vector_stores.faiss import FaissVectorStore
import faiss
# Create FAISS index
d = 1536 # Dimension of embeddings
faiss_index = faiss.IndexFlatL2(d)
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
from llama_index.llms.anthropic import Anthropic
from llama_index.core import Settings
# Set global LLM
Settings.llm = Anthropic(model="claude-sonnet-4-5-20250929")
# Now all queries use Anthropic
query_engine = index.as_query_engine()
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
# Use HuggingFace embeddings
Settings.embed_model = HuggingFaceEmbedding(
model_name="sentence-transformers/all-mpnet-base-v2"
)
index = VectorStoreIndex.from_documents(documents)
from llama_index.core import PromptTemplate
qa_prompt = PromptTemplate(
"Context: {context_str}\n"
"Question: {query_str}\n"
"Answer the question based only on the context. "
"If the answer is not in the context, say 'I don't know'.\n"
"Answer: "
)
query_engine = index.as_query_engine(text_qa_template=qa_prompt)
from llama_index.core import SimpleDirectoryReader
from llama_index.multi_modal_llms.openai import OpenAIMultiModal
# Load images and documents
documents = SimpleDirectoryReader(
"./data",
required_exts=[".jpg", ".png", ".pdf"]
).load_data()
# Multi-modal index
index = VectorStoreIndex.from_documents(documents)
# Query with multi-modal LLM
multi_modal_llm = OpenAIMultiModal(model="gpt-4o")
query_engine = index.as_query_engine(llm=multi_modal_llm)
response = query_engine.query("What is in the diagram on page 3?")
from llama_index.core.evaluation import RelevancyEvaluator, FaithfulnessEvaluator
# Evaluate relevance
relevancy = RelevancyEvaluator()
result = relevancy.evaluate_response(
query="What is Python?",
response=response
)
print(f"Relevancy: {result.passing}")
# Evaluate faithfulness (no hallucination)
faithfulness = FaithfulnessEvaluator()
result = faithfulness.evaluate_response(
query="What is Python?",
response=response
)
print(f"Faithfulness: {result.passing}")
# Complete RAG pipeline
documents = SimpleDirectoryReader("docs").load_data()
index = VectorStoreIndex.from_documents(documents)
index.storage_context.persist(persist_dir="./storage")
# Query
query_engine = index.as_query_engine(
similarity_top_k=3,
response_mode="compact",
verbose=True
)
response = query_engine.query("What is the main topic?")
print(response)
print(f"Sources: {[node.metadata['file_name'] for node in response.source_nodes]}")
# Conversational interface
chat_engine = index.as_chat_engine(
chat_mode="condense_plus_context",
verbose=True
)
# Multi-turn chat
while True:
user_input = input("You: ")
if user_input.lower() == "quit":
break
response = chat_engine.chat(user_input)
print(f"Bot: {response}")
| Operation | Latency | Notes |
|---|---|---|
| Index 100 docs | ~10-30s | One-time, can persist |
| Query (vector) | ~0.5-2s | Retrieval + LLM |
| Streaming query | ~0.5s first token | Better UX |
| Agent with tools | ~3-8s | Multiple tool calls |
| Feature | LlamaIndex | LangChain |
|---|---|---|
| Best for | RAG, document Q&A | Agents, general LLM apps |
| Data connectors | 300+ (LlamaHub) | 100+ |
| RAG focus | Core feature | One of many |
| Learning curve | Easier for RAG | Steeper |
| Customization | High | Very high |
| Documentation | Excellent | Good |
Use LlamaIndex when:
Use LangChain when:
Weekly Installs
231
Repository
GitHub Stars
22.6K
First Seen
Jan 21, 2026
Security Audits
Gen Agent Trust HubPassSocketPassSnykWarn
Installed on
opencode185
gemini-cli177
claude-code167
cursor165
codex162
github-copilot152
超能力技能使用指南:AI助手技能调用优先级与工作流程详解
41,800 周安装