GraphRAG 模式详解：知识图谱与RAG结合实现关系感知检索与推理

graphrag-patterns by latestaiagents/agent-skills

74 周安装量

2 GitHub Stars

GitHub

安装命令

npx skills add https://github.com/latestaiagents/agent-skills --skill graphrag-patterns

AI/机器学习知识管理自然语言处理

🇨🇳中文介绍

GraphRAG 模式

将知识图谱与 RAG 结合，实现关系感知的检索和推理。

使用场景

数据具有丰富的实体关系
问题涉及关联性（"X 与 Y 有何关联？"）
需要进行跨文档的多跳推理
基于结构化 + 非结构化数据构建
需要可解释的检索路径

GraphRAG 架构

┌──────────────────────────────────────────────────────────┐
│                    文档                                   │
└─────────────────────────┬────────────────────────────────┘
                          │
          ┌───────────────┼───────────────┐
          │               │               │
          ▼               ▼               ▼
   ┌────────────┐  ┌────────────┐  ┌────────────┐
   │   实体      │  │   向量     │  │    文本    │
   │   抽取      │  │   嵌入     │  │    分块    │
   └─────┬──────┘  └─────┬──────┘  └─────┬──────┘
         │               │               │
         ▼               │               │
   ┌────────────┐        │               │
   │   知识     │        │               │
   │    图谱    │        │               │
   └─────┬──────┘        │               │
         │               │               │
         └───────────────┼───────────────┘
                         │
                         ▼
              ┌─────────────────────┐
              │     混合索引        │
              │ （图谱 + 向量）     │
              └──────────┬──────────┘
                         │
                         ▼
              ┌─────────────────────┐
              │   图谱感知 RAG      │
              └─────────────────────┘

广告位招租

在这里展示您的产品或服务

触达数万 AI 开发者，精准高效

联系我们

实体与关系抽取

from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

EXTRACTION_PROMPT = """从文本中抽取实体和关系。

文本: {text}

返回 JSON:
{{
  "entities": [
    {{"name": "...", "type": "PERSON|ORG|PRODUCT|CONCEPT|...", "description": "..."}}
  ],
  "relationships": [
    {{"source": "...", "target": "...", "type": "WORKS_FOR|USES|RELATED_TO|...", "description": "..."}}
  ]
}}
"""

def extract_graph_elements(text: str) -> dict:
    llm = ChatOpenAI(model="gpt-4", temperature=0)
    prompt = ChatPromptTemplate.from_template(EXTRACTION_PROMPT)
    chain = prompt | llm
    result = chain.invoke({"text": text})
    return json.loads(result.content)

from neo4j import GraphDatabase

class GraphStore:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def add_entity(self, entity: dict):
        with self.driver.session() as session:
            session.run("""
                MERGE (e:Entity {name: $name})
                SET e.type = $type, e.description = $description
                """,
                name=entity["name"],
                type=entity["type"],
                description=entity["description"]
            )

    def add_relationship(self, rel: dict):
        with self.driver.session() as session:
            session.run("""
                MATCH (a:Entity {name: $source})
                MATCH (b:Entity {name: $target})
                MERGE (a)-[r:RELATES {type: $type}]->(b)
                SET r.description = $description
                """,
                source=rel["source"],
                target=rel["target"],
                type=rel["type"],
                description=rel["description"]
            )

    def get_neighbors(self, entity: str, hops: int = 2) -> list:
        with self.driver.session() as session:
            result = session.run("""
                MATCH path = (e:Entity {name: $name})-[*1..$hops]-(related)
                RETURN path
                """,
                name=entity, hops=hops
            )
            return [record["path"] for record in result]

GraphRAG 检索策略

1. 以实体为中心的检索

def entity_centric_retrieve(query: str, graph: GraphStore, vectorstore) -> list:
    """从查询中提取实体，通过图谱扩展，检索相关文本块。"""

    # 从查询中提取实体
    entities = extract_entities(query)

    # 获取图谱中的相邻实体
    expanded_entities = set(entities)
    for entity in entities:
        neighbors = graph.get_neighbors(entity, hops=2)
        expanded_entities.update(neighbors)

    # 检索提及这些实体的文本块
    chunks = []
    for entity in expanded_entities:
        results = vectorstore.similarity_search(
            entity,
            k=3,
            filter={"entities": {"$contains": entity}}
        )
        chunks.extend(results)

    return deduplicate(chunks)

2. 基于路径的检索

def path_retrieve(query: str, entity_a: str, entity_b: str, graph: GraphStore) -> str:
    """查找并解释实体间的路径。"""

    with graph.driver.session() as session:
        result = session.run("""
            MATCH path = shortestPath(
                (a:Entity {name: $entity_a})-[*..5]-(b:Entity {name: $entity_b})
            )
            RETURN path, length(path) as hops
            ORDER BY hops
            LIMIT 5
            """,
            entity_a=entity_a, entity_b=entity_b
        )

        paths = []
        for record in result:
            path = record["path"]
            path_str = " -> ".join([node["name"] for node in path.nodes])
            paths.append(path_str)

    return paths

3. 基于社区的检索（Microsoft GraphRAG）

from graspologic.partition import hierarchical_leiden

def build_communities(graph: GraphStore) -> dict:
    """检测社区以进行分层摘要。"""

    # 将图谱导出到 networkx
    nx_graph = graph.to_networkx()

    # 在多个层级检测社区
    communities = hierarchical_leiden(nx_graph, max_cluster_size=10)

    # 总结每个社区
    community_summaries = {}
    for community_id, members in communities.items():
        member_descriptions = [graph.get_entity(m)["description"] for m in members]
        summary = summarize_community(member_descriptions)
        community_summaries[community_id] = summary

    return community_summaries

def community_retrieve(query: str, community_summaries: dict) -> list:
    """先搜索社区摘要，再深入挖掘。"""

    # 查找相关社区
    relevant = vectorstore.similarity_search(
        query,
        k=3,
        filter={"type": "community_summary"}
    )

    # 从这些社区中获取实体
    entities = []
    for community in relevant:
        entities.extend(community.metadata["members"])

    # 检索详细的文本块
    return retrieve_by_entities(entities)

LangChain + Neo4j 集成

from langchain_community.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain

# 连接到 Neo4j
graph = Neo4jGraph(
    url="bolt://localhost:7687",
    username="neo4j",
    password="password"
)

# 自然语言转 Cypher 查询
chain = GraphCypherQAChain.from_llm(
    llm=ChatOpenAI(model="gpt-4"),
    graph=graph,
    verbose=True,
    return_intermediate_steps=True
)

# 使用自然语言查询
result = chain.invoke({
    "query": "有哪些工程师在参与 Atlas 项目？"
})
# 自动生成: MATCH (p:Person)-[:WORKS_ON]->(proj:Project {name: 'Atlas'}) RETURN p

图谱 + 向量混合管道

class GraphRAG:
    def __init__(self, graph: GraphStore, vectorstore, llm):
        self.graph = graph
        self.vectorstore = vectorstore
        self.llm = llm

    def retrieve(self, query: str) -> list:
        # 1. 向量搜索获取初始文本块
        vector_results = self.vectorstore.similarity_search(query, k=10)

        # 2. 从结果中提取实体
        entities = set()
        for doc in vector_results:
            entities.update(doc.metadata.get("entities", []))

        # 3. 通过图谱扩展
        graph_context = []
        for entity in list(entities)[:5]:  # 限制扩展数量
            neighbors = self.graph.get_neighbors(entity, hops=1)
            for neighbor in neighbors:
                graph_context.append(f"{entity} -> {neighbor['relationship']} -> {neighbor['name']}")

        # 4. 合并上下文
        return {
            "chunks": vector_results,
            "graph_context": graph_context
        }

    def generate(self, query: str, context: dict) -> str:
        prompt = f"""基于上下文回答问题。

        文本块:
        {self._format_chunks(context['chunks'])}

        实体关系:
        {chr(10).join(context['graph_context'])}

        问题: {query}
        """
        return self.llm.invoke(prompt).content

保持抽取一致性 - 对所有文档使用相同的 LLM/提示词
规范化实体 - "AWS"、"Amazon Web Services" → 指向同一节点
限制图谱深度 - 通常 2-3 跳就足够了
缓存遍历结果 - 图谱查询可能开销较大
与向量结合 - 仅用图谱会遗漏语义相似性
版本化你的模式 - 实体/关系类型会不断演进

🇺🇸English

GraphRAG Patterns

Combine knowledge graphs with RAG for relationship-aware retrieval and reasoning.

When to Use

Data has rich entity relationships
Questions involve connections ("How is X related to Y?")
Need multi-hop reasoning across documents
Building over structured + unstructured data
Want explainable retrieval paths

GraphRAG Architecture

┌──────────────────────────────────────────────────────────┐
│                    Documents                              │
└─────────────────────────┬────────────────────────────────┘
                          │
          ┌───────────────┼───────────────┐
          │               │               │
          ▼               ▼               ▼
   ┌────────────┐  ┌────────────┐  ┌────────────┐
   │   Entity   │  │   Vector   │  │    Text    │
   │ Extraction │  │ Embeddings │  │   Chunks   │
   └─────┬──────┘  └─────┬──────┘  └─────┬──────┘
         │               │               │
         ▼               │               │
   ┌────────────┐        │               │
   │  Knowledge │        │               │
   │    Graph   │        │               │
   └─────┬──────┘        │               │
         │               │               │
         └───────────────┼───────────────┘
                         │
                         ▼
              ┌─────────────────────┐
              │    Hybrid Index     │
              │ (Graph + Vectors)   │
              └──────────┬──────────┘
                         │
                         ▼
              ┌─────────────────────┐
              │   Graph-Aware RAG   │
              └─────────────────────┘

Building the Knowledge Graph

Entity & Relationship Extraction

from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

EXTRACTION_PROMPT = """Extract entities and relationships from the text.

Text: {text}

Return JSON:
{{
  "entities": [
    {{"name": "...", "type": "PERSON|ORG|PRODUCT|CONCEPT|...", "description": "..."}}
  ],
  "relationships": [
    {{"source": "...", "target": "...", "type": "WORKS_FOR|USES|RELATED_TO|...", "description": "..."}}
  ]
}}
"""

def extract_graph_elements(text: str) -> dict:
    llm = ChatOpenAI(model="gpt-4", temperature=0)
    prompt = ChatPromptTemplate.from_template(EXTRACTION_PROMPT)
    chain = prompt | llm
    result = chain.invoke({"text": text})
    return json.loads(result.content)

Store in Neo4j

from neo4j import GraphDatabase

class GraphStore:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def add_entity(self, entity: dict):
        with self.driver.session() as session:
            session.run("""
                MERGE (e:Entity {name: $name})
                SET e.type = $type, e.description = $description
                """,
                name=entity["name"],
                type=entity["type"],
                description=entity["description"]
            )

    def add_relationship(self, rel: dict):
        with self.driver.session() as session:
            session.run("""
                MATCH (a:Entity {name: $source})
                MATCH (b:Entity {name: $target})
                MERGE (a)-[r:RELATES {type: $type}]->(b)
                SET r.description = $description
                """,
                source=rel["source"],
                target=rel["target"],
                type=rel["type"],
                description=rel["description"]
            )

    def get_neighbors(self, entity: str, hops: int = 2) -> list:
        with self.driver.session() as session:
            result = session.run("""
                MATCH path = (e:Entity {name: $name})-[*1..$hops]-(related)
                RETURN path
                """,
                name=entity, hops=hops
            )
            return [record["path"] for record in result]

GraphRAG Retrieval Strategies

1. Entity-Centric Retrieval

def entity_centric_retrieve(query: str, graph: GraphStore, vectorstore) -> list:
    """Extract entities from query, expand via graph, retrieve chunks."""

    # Extract entities from query
    entities = extract_entities(query)

    # Get graph neighbors
    expanded_entities = set(entities)
    for entity in entities:
        neighbors = graph.get_neighbors(entity, hops=2)
        expanded_entities.update(neighbors)

    # Retrieve chunks mentioning these entities
    chunks = []
    for entity in expanded_entities:
        results = vectorstore.similarity_search(
            entity,
            k=3,
            filter={"entities": {"$contains": entity}}
        )
        chunks.extend(results)

    return deduplicate(chunks)

2. Path-Based Retrieval

def path_retrieve(query: str, entity_a: str, entity_b: str, graph: GraphStore) -> str:
    """Find and explain paths between entities."""

    with graph.driver.session() as session:
        result = session.run("""
            MATCH path = shortestPath(
                (a:Entity {name: $entity_a})-[*..5]-(b:Entity {name: $entity_b})
            )
            RETURN path, length(path) as hops
            ORDER BY hops
            LIMIT 5
            """,
            entity_a=entity_a, entity_b=entity_b
        )

        paths = []
        for record in result:
            path = record["path"]
            path_str = " -> ".join([node["name"] for node in path.nodes])
            paths.append(path_str)

    return paths

3. Community-Based Retrieval (Microsoft GraphRAG)

from graspologic.partition import hierarchical_leiden

def build_communities(graph: GraphStore) -> dict:
    """Detect communities for hierarchical summarization."""

    # Export graph to networkx
    nx_graph = graph.to_networkx()

    # Detect communities at multiple levels
    communities = hierarchical_leiden(nx_graph, max_cluster_size=10)

    # Summarize each community
    community_summaries = {}
    for community_id, members in communities.items():
        member_descriptions = [graph.get_entity(m)["description"] for m in members]
        summary = summarize_community(member_descriptions)
        community_summaries[community_id] = summary

    return community_summaries

def community_retrieve(query: str, community_summaries: dict) -> list:
    """Search community summaries first, then drill down."""

    # Find relevant communities
    relevant = vectorstore.similarity_search(
        query,
        k=3,
        filter={"type": "community_summary"}
    )

    # Get entities from those communities
    entities = []
    for community in relevant:
        entities.extend(community.metadata["members"])

    # Retrieve detailed chunks
    return retrieve_by_entities(entities)

LangChain + Neo4j Integration

from langchain_community.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain

# Connect to Neo4j
graph = Neo4jGraph(
    url="bolt://localhost:7687",
    username="neo4j",
    password="password"
)

# Natural language to Cypher
chain = GraphCypherQAChain.from_llm(
    llm=ChatOpenAI(model="gpt-4"),
    graph=graph,
    verbose=True,
    return_intermediate_steps=True
)

# Query in natural language
result = chain.invoke({
    "query": "Who are the engineers working on Project Atlas?"
})
# Automatically generates: MATCH (p:Person)-[:WORKS_ON]->(proj:Project {name: 'Atlas'}) RETURN p

Hybrid Graph + Vector Pipeline

class GraphRAG:
    def __init__(self, graph: GraphStore, vectorstore, llm):
        self.graph = graph
        self.vectorstore = vectorstore
        self.llm = llm

    def retrieve(self, query: str) -> list:
        # 1. Vector search for initial chunks
        vector_results = self.vectorstore.similarity_search(query, k=10)

        # 2. Extract entities from results
        entities = set()
        for doc in vector_results:
            entities.update(doc.metadata.get("entities", []))

        # 3. Expand via graph
        graph_context = []
        for entity in list(entities)[:5]:  # Limit expansion
            neighbors = self.graph.get_neighbors(entity, hops=1)
            for neighbor in neighbors:
                graph_context.append(f"{entity} -> {neighbor['relationship']} -> {neighbor['name']}")

        # 4. Combine contexts
        return {
            "chunks": vector_results,
            "graph_context": graph_context
        }

    def generate(self, query: str, context: dict) -> str:
        prompt = f"""Answer based on the context.

        Text chunks:
        {self._format_chunks(context['chunks'])}

        Entity relationships:
        {chr(10).join(context['graph_context'])}

        Question: {query}
        """
        return self.llm.invoke(prompt).content

Best Practices

Extract consistently - use same LLM/prompt for all documents
Normalize entities - "AWS", "Amazon Web Services" → same node
Limit graph depth - 2-3 hops usually sufficient
Cache traversals - graph queries can be expensive
Combine with vectors - graph alone misses semantic similarity
Version your schema - entity/relationship types will evolve

Weekly Installs

Repository

latestaiagents/…t-skills

GitHub Stars

First Seen

Feb 5, 2026

Security Audits

Gen Agent Trust HubPass SocketPass SnykPass

Installed on

opencode68

gemini-cli68

codex68

github-copilot68

cursor65

kimi-cli64

AI 代码实施计划编写技能 | 自动化开发任务分解与 TDD 流程规划工具

50,900 周安装