Knowledge Graph Builder by daffy0208/ai-dev-standards
npx skills add https://github.com/daffy0208/ai-dev-standards --skill 'Knowledge Graph Builder'通过关系知识构建结构化知识图谱,以提升 AI 系统性能。
知识图谱使隐式关系显式化,使 AI 系统能够推理连接、验证事实并避免幻觉。
目标:为您的领域定义实体、关系和属性
实体类型(节点):
关系类型(边):
属性(特性):
广告位招租
在这里展示您的产品或服务
触达数万 AI 开发者,精准高效
本体示例:
# RDF/Turtle format
@prefix : <http://example.org/ontology#> .
:Person a owl:Class ;
rdfs:label "Person" .
:Organization a owl:Class ;
rdfs:label "Organization" .
:worksFor a owl:ObjectProperty ;
rdfs:domain :Person ;
rdfs:range :Organization ;
rdfs:label "works for" .
验证:
决策矩阵:
Neo4j(对大多数情况推荐):
Amazon Neptune:
ArangoDB:
TigerGraph:
技术栈:
graph_database: 'Neo4j Community' # or Enterprise for production
vector_integration: 'Pinecone' # For hybrid search
embeddings: 'text-embedding-3-large' # OpenAI
etl: 'Apache Airflow' # For data pipelines
Neo4j 模式设置:
// Create constraints for uniqueness
CREATE CONSTRAINT person_id IF NOT EXISTS
FOR (p:Person) REQUIRE p.id IS UNIQUE;
CREATE CONSTRAINT org_name IF NOT EXISTS
FOR (o:Organization) REQUIRE o.name IS UNIQUE;
// Create indexes for performance
CREATE INDEX entity_search IF NOT EXISTS
FOR (e:Entity) ON (e.name, e.type);
CREATE INDEX relationship_type IF NOT EXISTS
FOR ()-[r:RELATED_TO]-() ON (r.type, r.confidence);
目标:从数据源中提取实体和关系
数据源:
实体提取管道:
class EntityExtractionPipeline:
def __init__(self):
self.ner_model = load_ner_model() # spaCy, Hugging Face
self.entity_linker = EntityLinker()
self.deduplicator = EntityDeduplicator()
def process_text(self, text: str) -> List[Entity]:
# 1. Extract named entities
entities = self.ner_model.extract(text)
# 2. Link to existing entities (entity resolution)
linked_entities = self.entity_linker.link(entities)
# 3. Deduplicate and resolve conflicts
resolved_entities = self.deduplicator.resolve(linked_entities)
return resolved_entities
关系提取:
class RelationshipExtractor:
def extract_relationships(self, entities: List[Entity],
text: str) -> List[Relationship]:
relationships = []
# Use dependency parsing or LLM for extraction
doc = self.nlp(text)
for sent in doc.sents:
rels = self.extract_from_sentence(sent, entities)
relationships.extend(rels)
# Validate against ontology
valid_relationships = self.validate_relationships(relationships)
return valid_relationships
基于 LLM 的提取(用于复杂关系):
def extract_with_llm(text: str) -> List[Relationship]:
prompt = f"""
Extract entities and relationships from this text:
{text}
Format: (Entity1, Relationship, Entity2, Confidence)
Only extract factual relationships.
"""
response = llm.generate(prompt)
relationships = parse_llm_response(response)
return relationships
验证:
目标:将结构化图与语义向量搜索相结合
架构:
class HybridKnowledgeSystem:
def __init__(self):
self.graph_db = Neo4jConnection()
self.vector_db = PineconeClient()
self.embedding_model = OpenAIEmbeddings()
def store_entity(self, entity: Entity):
# Store structured data in graph
self.graph_db.create_node(entity)
# Store embeddings in vector database
embedding = self.embedding_model.embed(entity.description)
self.vector_db.upsert(
id=entity.id,
values=embedding,
metadata=entity.metadata
)
def hybrid_search(self, query: str, top_k: int = 10) -> SearchResults:
# 1. Vector similarity search
query_embedding = self.embedding_model.embed(query)
vector_results = self.vector_db.query(
vector=query_embedding,
top_k=100
)
# 2. Graph traversal from vector results
entity_ids = [r.id for r in vector_results.matches]
graph_results = self.graph_db.get_subgraph(entity_ids, max_hops=2)
# 3. Merge and rank results
merged = self.merge_results(vector_results, graph_results)
return merged[:top_k]
混合方法的优势:
常见查询模式:
1. 查找实体:
MATCH (e:Entity {id: $entity_id})
RETURN e
2. 查找关系:
MATCH (source:Entity {id: $entity_id})-[r]-(target)
RETURN source, r, target
LIMIT 20
3. 实体间路径:
MATCH path = shortestPath(
(source:Person {id: $source_id})-[*..5]-(target:Person {id: $target_id})
)
RETURN path
4. 多跳遍历:
MATCH (p:Person {name: $name})-[:WORKS_FOR]->(o:Organization)-[:LOCATED_IN]->(l:Location)
RETURN p.name, o.name, l.city
5. 推荐查询:
// Find people similar to this person based on shared organizations
MATCH (p1:Person {id: $person_id})-[:WORKS_FOR]->(o:Organization)<-[:WORKS_FOR]-(p2:Person)
WHERE p1 <> p2
RETURN p2, COUNT(o) AS shared_orgs
ORDER BY shared_orgs DESC
LIMIT 10
知识图谱 API:
class KnowledgeGraphAPI:
def __init__(self, graph_db):
self.graph = graph_db
def find_entity(self, entity_name: str) -> Entity:
"""Find entity by name with fuzzy matching"""
query = """
MATCH (e:Entity)
WHERE e.name CONTAINS $name
RETURN e
ORDER BY apoc.text.levenshtein(e.name, $name)
LIMIT 1
"""
return self.graph.run(query, name=entity_name).single()
def find_relationships(self, entity_id: str,
relationship_type: str = None,
max_hops: int = 2) -> List[Relationship]:
"""Find relationships within specified hops"""
query = f"""
MATCH (source:Entity {{id: $entity_id}})
MATCH path = (source)-[r*1..{max_hops}]-(target)
RETURN path, relationships(path) AS rels
LIMIT 100
"""
return self.graph.run(query, entity_id=entity_id).data()
def get_subgraph(self, entity_ids: List[str],
max_hops: int = 2) -> Subgraph:
"""Get connected subgraph for multiple entities"""
query = f"""
MATCH (e:Entity)
WHERE e.id IN $entity_ids
CALL apoc.path.subgraphAll(e, {{maxLevel: {max_hops}}})
YIELD nodes, relationships
RETURN nodes, relationships
"""
return self.graph.run(query, entity_ids=entity_ids).data()
目标:使用知识图谱来锚定 LLM 响应并检测幻觉
知识图谱 RAG:
class KnowledgeGraphRAG:
def __init__(self, kg_api, llm_client):
self.kg = kg_api
self.llm = llm_client
def retrieve_context(self, query: str) -> str:
# Extract entities from query
entities = self.extract_entities_from_query(query)
# Retrieve relevant subgraph
subgraph = self.kg.get_subgraph(
[e.id for e in entities],
max_hops=2
)
# Format subgraph for LLM
context = self.format_subgraph_for_llm(subgraph)
return context
def generate_with_grounding(self, query: str) -> GroundedResponse:
context = self.retrieve_context(query)
prompt = f"""
Context from knowledge graph:
{context}
User query: {query}
Answer based only on the provided context. Include source entities.
"""
response = self.llm.generate(prompt)
return GroundedResponse(
response=response,
sources=self.extract_sources(context),
confidence=self.calculate_confidence(response, context)
)
幻觉检测:
class HallucinationDetector:
def __init__(self, knowledge_graph):
self.kg = knowledge_graph
def verify_claim(self, claim: str) -> VerificationResult:
# Parse claim into (subject, predicate, object)
parsed_claim = self.parse_claim(claim)
# Query knowledge graph for evidence
evidence = self.kg.find_evidence(
parsed_claim.subject,
parsed_claim.predicate,
parsed_claim.object
)
if evidence:
return VerificationResult(
is_supported=True,
evidence=evidence,
confidence=evidence.confidence
)
# Check for contradictory evidence
contradiction = self.kg.find_contradiction(parsed_claim)
return VerificationResult(
is_supported=False,
is_contradicted=bool(contradiction),
contradiction=contradiction
)
在摄取数据之前定义您的模式。后期更改本体成本高昂。
积极进行实体去重。"Apple Inc"、"Apple"、"Apple Computer" → 同一实体。
每个关系都应具有置信度分数(0.0-1.0)和来源。
不要试图一次性建模整个领域。从核心实体开始并逐步扩展。
结合图遍历(结构化)和向量搜索(语义)以获得最佳结果。
1. 问答:
2. 推荐:
3. 欺诈检测:
4. 知识发现:
5. 语义搜索:
对于 MVP(<10K 实体):
对于生产环境(10K-1M 实体):
对于企业级(1M+ 实体):
相关技能:
rag-implementer - 用于混合 KG+RAG 系统multi-agent-architect - 用于知识图谱驱动的智能体api-designer - 用于 KG API 设计相关模式:
META/DECISION-FRAMEWORK.md - 图数据库选择STANDARDS/architecture-patterns/knowledge-graph-pattern.md - KG 架构(创建时)相关操作手册:
PLAYBOOKS/deploy-neo4j.md - Neo4j 部署(创建时)PLAYBOOKS/build-kg-rag-system.md - KG-RAG 集成(创建时)每周安装次数
0
仓库
GitHub 星标数
18
首次出现
Jan 1, 1970
安全审计
Build structured knowledge graphs for enhanced AI system performance through relational knowledge.
Knowledge graphs make implicit relationships explicit , enabling AI systems to reason about connections, verify facts, and avoid hallucinations.
Goal : Define entities, relationships, and properties for your domain
Entity Types (Nodes):
Relationship Types (Edges):
Properties (Attributes):
Example Ontology :
# RDF/Turtle format
@prefix : <http://example.org/ontology#> .
:Person a owl:Class ;
rdfs:label "Person" .
:Organization a owl:Class ;
rdfs:label "Organization" .
:worksFor a owl:ObjectProperty ;
rdfs:domain :Person ;
rdfs:range :Organization ;
rdfs:label "works for" .
Validation :
Decision Matrix :
Neo4j (Recommended for most):
Amazon Neptune :
ArangoDB :
TigerGraph :
Technology Stack :
graph_database: 'Neo4j Community' # or Enterprise for production
vector_integration: 'Pinecone' # For hybrid search
embeddings: 'text-embedding-3-large' # OpenAI
etl: 'Apache Airflow' # For data pipelines
Neo4j Schema Setup :
// Create constraints for uniqueness
CREATE CONSTRAINT person_id IF NOT EXISTS
FOR (p:Person) REQUIRE p.id IS UNIQUE;
CREATE CONSTRAINT org_name IF NOT EXISTS
FOR (o:Organization) REQUIRE o.name IS UNIQUE;
// Create indexes for performance
CREATE INDEX entity_search IF NOT EXISTS
FOR (e:Entity) ON (e.name, e.type);
CREATE INDEX relationship_type IF NOT EXISTS
FOR ()-[r:RELATED_TO]-() ON (r.type, r.confidence);
Goal : Extract entities and relationships from data sources
Data Sources :
Entity Extraction Pipeline :
class EntityExtractionPipeline:
def __init__(self):
self.ner_model = load_ner_model() # spaCy, Hugging Face
self.entity_linker = EntityLinker()
self.deduplicator = EntityDeduplicator()
def process_text(self, text: str) -> List[Entity]:
# 1. Extract named entities
entities = self.ner_model.extract(text)
# 2. Link to existing entities (entity resolution)
linked_entities = self.entity_linker.link(entities)
# 3. Deduplicate and resolve conflicts
resolved_entities = self.deduplicator.resolve(linked_entities)
return resolved_entities
Relationship Extraction :
class RelationshipExtractor:
def extract_relationships(self, entities: List[Entity],
text: str) -> List[Relationship]:
relationships = []
# Use dependency parsing or LLM for extraction
doc = self.nlp(text)
for sent in doc.sents:
rels = self.extract_from_sentence(sent, entities)
relationships.extend(rels)
# Validate against ontology
valid_relationships = self.validate_relationships(relationships)
return valid_relationships
LLM-Based Extraction (for complex relationships):
def extract_with_llm(text: str) -> List[Relationship]:
prompt = f"""
Extract entities and relationships from this text:
{text}
Format: (Entity1, Relationship, Entity2, Confidence)
Only extract factual relationships.
"""
response = llm.generate(prompt)
relationships = parse_llm_response(response)
return relationships
Validation :
Goal : Combine structured graph with semantic vector search
Architecture :
class HybridKnowledgeSystem:
def __init__(self):
self.graph_db = Neo4jConnection()
self.vector_db = PineconeClient()
self.embedding_model = OpenAIEmbeddings()
def store_entity(self, entity: Entity):
# Store structured data in graph
self.graph_db.create_node(entity)
# Store embeddings in vector database
embedding = self.embedding_model.embed(entity.description)
self.vector_db.upsert(
id=entity.id,
values=embedding,
metadata=entity.metadata
)
def hybrid_search(self, query: str, top_k: int = 10) -> SearchResults:
# 1. Vector similarity search
query_embedding = self.embedding_model.embed(query)
vector_results = self.vector_db.query(
vector=query_embedding,
top_k=100
)
# 2. Graph traversal from vector results
entity_ids = [r.id for r in vector_results.matches]
graph_results = self.graph_db.get_subgraph(entity_ids, max_hops=2)
# 3. Merge and rank results
merged = self.merge_results(vector_results, graph_results)
return merged[:top_k]
Benefits of Hybrid Approach :
Common Query Patterns :
1. Find Entity :
MATCH (e:Entity {id: $entity_id})
RETURN e
2. Find Relationships :
MATCH (source:Entity {id: $entity_id})-[r]-(target)
RETURN source, r, target
LIMIT 20
3. Path Between Entities :
MATCH path = shortestPath(
(source:Person {id: $source_id})-[*..5]-(target:Person {id: $target_id})
)
RETURN path
4. Multi-Hop Traversal :
MATCH (p:Person {name: $name})-[:WORKS_FOR]->(o:Organization)-[:LOCATED_IN]->(l:Location)
RETURN p.name, o.name, l.city
5. Recommendation Query :
// Find people similar to this person based on shared organizations
MATCH (p1:Person {id: $person_id})-[:WORKS_FOR]->(o:Organization)<-[:WORKS_FOR]-(p2:Person)
WHERE p1 <> p2
RETURN p2, COUNT(o) AS shared_orgs
ORDER BY shared_orgs DESC
LIMIT 10
Knowledge Graph API :
class KnowledgeGraphAPI:
def __init__(self, graph_db):
self.graph = graph_db
def find_entity(self, entity_name: str) -> Entity:
"""Find entity by name with fuzzy matching"""
query = """
MATCH (e:Entity)
WHERE e.name CONTAINS $name
RETURN e
ORDER BY apoc.text.levenshtein(e.name, $name)
LIMIT 1
"""
return self.graph.run(query, name=entity_name).single()
def find_relationships(self, entity_id: str,
relationship_type: str = None,
max_hops: int = 2) -> List[Relationship]:
"""Find relationships within specified hops"""
query = f"""
MATCH (source:Entity {{id: $entity_id}})
MATCH path = (source)-[r*1..{max_hops}]-(target)
RETURN path, relationships(path) AS rels
LIMIT 100
"""
return self.graph.run(query, entity_id=entity_id).data()
def get_subgraph(self, entity_ids: List[str],
max_hops: int = 2) -> Subgraph:
"""Get connected subgraph for multiple entities"""
query = f"""
MATCH (e:Entity)
WHERE e.id IN $entity_ids
CALL apoc.path.subgraphAll(e, {{maxLevel: {max_hops}}})
YIELD nodes, relationships
RETURN nodes, relationships
"""
return self.graph.run(query, entity_ids=entity_ids).data()
Goal : Use knowledge graph to ground LLM responses and detect hallucinations
Knowledge Graph RAG :
class KnowledgeGraphRAG:
def __init__(self, kg_api, llm_client):
self.kg = kg_api
self.llm = llm_client
def retrieve_context(self, query: str) -> str:
# Extract entities from query
entities = self.extract_entities_from_query(query)
# Retrieve relevant subgraph
subgraph = self.kg.get_subgraph(
[e.id for e in entities],
max_hops=2
)
# Format subgraph for LLM
context = self.format_subgraph_for_llm(subgraph)
return context
def generate_with_grounding(self, query: str) -> GroundedResponse:
context = self.retrieve_context(query)
prompt = f"""
Context from knowledge graph:
{context}
User query: {query}
Answer based only on the provided context. Include source entities.
"""
response = self.llm.generate(prompt)
return GroundedResponse(
response=response,
sources=self.extract_sources(context),
confidence=self.calculate_confidence(response, context)
)
Hallucination Detection :
class HallucinationDetector:
def __init__(self, knowledge_graph):
self.kg = knowledge_graph
def verify_claim(self, claim: str) -> VerificationResult:
# Parse claim into (subject, predicate, object)
parsed_claim = self.parse_claim(claim)
# Query knowledge graph for evidence
evidence = self.kg.find_evidence(
parsed_claim.subject,
parsed_claim.predicate,
parsed_claim.object
)
if evidence:
return VerificationResult(
is_supported=True,
evidence=evidence,
confidence=evidence.confidence
)
# Check for contradictory evidence
contradiction = self.kg.find_contradiction(parsed_claim)
return VerificationResult(
is_supported=False,
is_contradicted=bool(contradiction),
contradiction=contradiction
)
Define your schema before ingesting data. Changing ontology later is expensive.
Deduplicate entities aggressively. "Apple Inc", "Apple", "Apple Computer" → same entity.
Every relationship should have a confidence score (0.0-1.0) and source.
Don't try to model entire domain at once. Start with core entities and expand.
Combine graph traversal (structured) with vector search (semantic) for best results.
1. Question Answering :
2. Recommendation :
3. Fraud Detection :
4. Knowledge Discovery :
5. Semantic Search :
For MVPs ( <10K entities):
For Production (10K-1M entities) :
For Enterprise (1M+ entities) :
Related Skills :
rag-implementer - For hybrid KG+RAG systemsmulti-agent-architect - For knowledge-graph-powered agentsapi-designer - For KG API designRelated Patterns :
META/DECISION-FRAMEWORK.md - Graph DB selectionSTANDARDS/architecture-patterns/knowledge-graph-pattern.md - KG architectures (when created)Related Playbooks :
PLAYBOOKS/deploy-neo4j.md - Neo4j deployment (when created)PLAYBOOKS/build-kg-rag-system.md - KG-RAG integration (when created)Weekly Installs
0
Repository
GitHub Stars
18
First Seen
Jan 1, 1970
Security Audits
超能力技能使用指南:AI助手技能调用优先级与工作流程详解
45,100 周安装