Managing Knowledge Bases¶
Learn how to effectively organize, maintain, and query DreamLog knowledge bases.
Knowledge Base Structure¶
Core Components¶
A DreamLog knowledge base consists of:
- Facts - Ground truths about the world
- Rules - Conditional relationships and inference patterns
- Indexes - Efficient lookups by functor
- LLM Hook - Optional AI-powered knowledge generation
from dreamlog.pythonic import dreamlog
kb = dreamlog()
print(kb.stats)
# {'num_facts': 0, 'num_rules': 0, 'functors': [], 'total_items': 0}
Internal Organization¶
# Facts are indexed by functor for efficient retrieval
kb.fact("parent", "john", "mary")
kb.fact("parent", "mary", "alice")
kb.fact("age", "john", 45)
# Internally organized as:
# fact_index = {
# "parent": [Fact(parent john mary), Fact(parent mary alice)],
# "age": [Fact(age john 45)]
# }
Adding Knowledge¶
Adding Facts¶
# Single fact
kb.fact("student", "alice", "cs")
# Multiple facts at once
kb.facts(
("student", "bob", "math"),
("student", "charlie", "physics"),
("professor", "smith", "cs")
)
# From S-expressions
kb.parse("(enrolled alice cs101)")
kb.parse("(grade alice cs101 95)")
Adding Rules¶
# Using fluent API
kb.rule("grandparent", ["X", "Z"]) \
.when("parent", ["X", "Y"]) \
.and_("parent", ["Y", "Z"]) \
.build()
# From S-expressions
kb.parse("""
(ancestor X Y) :- (parent X Y)
(ancestor X Z) :- (parent X Y), (ancestor Y Z)
""")
# Multiple rules
rules = """
(sibling X Y) :- (parent Z X), (parent Z Y), (different X Y)
(uncle X Y) :- (sibling X Z), (parent Z Y), (male X)
(aunt X Y) :- (sibling X Z), (parent Z Y), (female X)
"""
kb.parse(rules)
Batch Loading¶
# From file
kb.load("knowledge_base.dreamlog")
# From Python data structures
facts_data = [
["student", "alice", "cs"],
["student", "bob", "math"],
["grade", "alice", "cs101", 95]
]
for fact in facts_data:
kb.fact(*fact)
# From JSON
import json
with open("facts.json") as f:
data = json.load(f)
for item in data["facts"]:
kb.fact(*item)
Querying Knowledge¶
Basic Queries¶
# Ground query (checking existence)
exists = kb.query_exists("parent", "john", "mary")
print(f"John is Mary's parent: {exists}")
# Variable queries
for result in kb.query("parent", "john", "X"):
print(f"John is parent of {result['X']}")
# Multiple variables
for result in kb.query("parent", "X", "Y"):
print(f"{result['X']} is parent of {result['Y']}")
Advanced Queries¶
# Complex queries with multiple goals
results = kb.query_complex([
("parent", "X", "Y"),
("parent", "Y", "Z"),
("age", "X", "Age"),
("greater", "Age", 40)
])
for r in results:
print(f"{r['X']} (age {r['Age']}) is grandparent of {r['Z']}")
# Get first N results
for i, result in enumerate(kb.query("student", "X", "_")):
if i >= 5:
break
print(f"Student {i+1}: {result['X']}")
# Collect all results
all_students = list(kb.query("student", "X", "_"))
print(f"Total students: {len(all_students)}")
Query with Explanations¶
# Enable tracing for explanations
kb.set_trace(True)
for result in kb.query("grandparent", "john", "X"):
print(f"Result: {result}")
print(f"Explanation: {kb.get_last_trace()}")
kb.set_trace(False)
Knowledge Organization¶
Namespacing with Functors¶
# Use prefixes for organization
kb.fact("person:name", "alice", "Alice Smith")
kb.fact("person:age", "alice", 25)
kb.fact("person:email", "alice", "alice@example.com")
kb.fact("course:name", "cs101", "Intro to Programming")
kb.fact("course:credits", "cs101", 3)
kb.fact("course:instructor", "cs101", "smith")
Hierarchical Organization¶
# Department -> Course -> Section structure
kb.fact("department", "cs", "Computer Science")
kb.fact("course", "cs101", "cs", "Intro Programming")
kb.fact("section", "cs101-01", "cs101", "morning")
kb.fact("section", "cs101-02", "cs101", "afternoon")
# Query hierarchically
kb.parse("""
(courses_in_dept Dept Course) :-
(course Course Dept _)
(sections_of_course Course Section) :-
(section Section Course _)
""")
Temporal Facts¶
# Add timestamps to facts
from datetime import datetime
def add_temporal_fact(kb, predicate, *args, timestamp=None):
ts = timestamp or datetime.now().isoformat()
kb.fact(f"{predicate}_at", *args, ts)
kb.fact(predicate, *args) # Current fact
# Usage
add_temporal_fact(kb, "enrolled", "alice", "cs101")
add_temporal_fact(kb, "grade", "alice", "cs101", 95,
timestamp="2024-05-15")
# Query historical data
for r in kb.query("enrolled_at", "alice", "Course", "Time"):
print(f"Enrolled in {r['Course']} at {r['Time']}")
Modifying Knowledge¶
Updating Facts¶
# DreamLog doesn't have direct update, so we retract and assert
def update_fact(kb, old_fact, new_fact):
# Remove old fact
kb.retract(*old_fact)
# Add new fact
kb.fact(*new_fact)
# Example
update_fact(kb,
("age", "alice", 25),
("age", "alice", 26))
Retracting Knowledge¶
# Remove specific fact
kb.retract("enrolled", "alice", "cs101")
# Remove all facts matching pattern
kb.retract_all("enrolled", "alice", "_")
# Clear all facts for a functor
kb.clear_functor("temp_data")
Rule Management¶
# Add versioned rules
kb.parse("""
(discount_v1 Student Amount) :-
(student Student _),
(equals Amount 10)
""")
# Replace with new version
kb.remove_rules("discount_v1")
kb.parse("""
(discount_v2 Student Amount) :-
(student Student _),
(honors Student),
(equals Amount 20)
""")
kb.parse("""
(discount_v2 Student Amount) :-
(student Student _),
(not (honors Student)),
(equals Amount 10)
""")
Persistence¶
Saving Knowledge¶
# Save to file
kb.save("my_knowledge.dreamlog")
# Save with metadata
metadata = {
"version": "1.0",
"created": datetime.now().isoformat(),
"domain": "academic"
}
kb.save_with_metadata("kb_with_meta.dreamlog", metadata)
# Export as S-expressions
with open("kb.sexp", "w") as f:
f.write(kb.to_sexp())
# Export as JSON
import json
with open("kb.json", "w") as f:
json.dump(kb.to_json(), f, indent=2)
Loading Knowledge¶
# Load from file
kb = dreamlog()
kb.load("my_knowledge.dreamlog")
# Merge multiple knowledge bases
kb1 = dreamlog()
kb1.load("domain1.dreamlog")
kb2 = dreamlog()
kb2.load("domain2.dreamlog")
# Merge kb2 into kb1
kb1.merge(kb2)
# Load with validation
def validate_and_load(kb, filepath):
temp_kb = dreamlog()
temp_kb.load(filepath)
# Validate
if temp_kb.stats['num_facts'] == 0:
raise ValueError("Empty knowledge base")
# Check for required functors
required = ["student", "course", "enrolled"]
functors = temp_kb.stats['functors']
for req in required:
if req not in functors:
raise ValueError(f"Missing required functor: {req}")
kb.merge(temp_kb)
return True
Knowledge Base Patterns¶
Repository Pattern¶
class StudentRepository:
def __init__(self, kb):
self.kb = kb
def add_student(self, id, name, major):
self.kb.fact("student", id)
self.kb.fact("student_name", id, name)
self.kb.fact("student_major", id, major)
return id
def get_student(self, id):
student = {"id": id}
for r in self.kb.query("student_name", id, "Name"):
student["name"] = r["Name"]
for r in self.kb.query("student_major", id, "Major"):
student["major"] = r["Major"]
return student if "name" in student else None
def find_by_major(self, major):
students = []
for r in self.kb.query("student_major", "Id", major):
students.append(self.get_student(r["Id"]))
return students
# Usage
repo = StudentRepository(kb)
repo.add_student("s001", "Alice Smith", "CS")
repo.add_student("s002", "Bob Jones", "Math")
cs_students = repo.find_by_major("CS")
Domain Separation¶
class DomainKB:
def __init__(self):
self.domains = {}
def get_domain(self, name):
if name not in self.domains:
self.domains[name] = dreamlog()
return self.domains[name]
def query_across_domains(self, *domains, query):
results = []
for domain in domains:
if domain in self.domains:
results.extend(self.domains[domain].query(*query))
return results
# Usage
dkb = DomainKB()
# Academic domain
academic = dkb.get_domain("academic")
academic.fact("student", "alice", "cs")
academic.fact("grade", "alice", "A")
# Financial domain
financial = dkb.get_domain("financial")
financial.fact("tuition_paid", "alice", True)
financial.fact("balance", "alice", 0)
# Cross-domain query
eligible = []
for r in dkb.query_across_domains("academic", "financial",
query=("student", "X", "_")):
student = r["X"]
# Check both domains
if academic.query_exists("grade", student, "A") and \
financial.query_exists("tuition_paid", student, True):
eligible.append(student)
Caching Pattern¶
class CachedKB:
def __init__(self, kb):
self.kb = kb
self.cache = {}
def query_cached(self, *args):
key = str(args)
if key not in self.cache:
self.cache[key] = list(self.kb.query(*args))
return self.cache[key]
def invalidate(self, functor=None):
if functor:
# Invalidate queries involving this functor
self.cache = {k: v for k, v in self.cache.items()
if functor not in k}
else:
self.cache.clear()
def fact(self, *args):
self.kb.fact(*args)
self.invalidate(args[0]) # Invalidate related cache
# Usage
cached_kb = CachedKB(kb)
results1 = cached_kb.query_cached("expensive_query", "X", "Y")
results2 = cached_kb.query_cached("expensive_query", "X", "Y") # From cache
Performance Optimization¶
Indexing Strategies¶
# Create custom indexes for frequent queries
class IndexedKB:
def __init__(self, kb):
self.kb = kb
self.indexes = {}
def create_index(self, name, functor, position):
"""Create index on specific argument position"""
index = {}
for result in self.kb.query(functor, *["_"] * 3):
key = result[f"arg{position}"]
if key not in index:
index[key] = []
index[key].append(result)
self.indexes[name] = index
def query_indexed(self, index_name, key):
return self.indexes.get(index_name, {}).get(key, [])
# Usage
ikb = IndexedKB(kb)
ikb.create_index("by_major", "student", 2)
cs_students = ikb.query_indexed("by_major", "cs")
Query Optimization¶
# Order goals from most to least selective
# Bad: Generate all pairs then filter
kb.parse("""
(efficient_query X Y) :-
(person X),
(person Y),
(age X AgeX),
(age Y AgeY),
(greater AgeX 50),
(less AgeY 30)
""")
# Good: Filter early
kb.parse("""
(efficient_query X Y) :-
(age X AgeX),
(greater AgeX 50),
(age Y AgeY),
(less AgeY 30),
(person X),
(person Y)
""")
Knowledge Validation¶
Consistency Checking¶
def check_consistency(kb):
"""Check for logical inconsistencies"""
issues = []
# Check for conflicting facts
for r in kb.query("age", "X", "Age1"):
person = r["X"]
ages = list(kb.query("age", person, "Age"))
if len(ages) > 1:
issues.append(f"Multiple ages for {person}: {ages}")
# Check for impossible relationships
for r in kb.query("parent", "X", "Y"):
if kb.query_exists("parent", r["Y"], r["X"]):
issues.append(f"Circular parentage: {r['X']} <-> {r['Y']}")
return issues
# Usage
issues = check_consistency(kb)
if issues:
print("Consistency issues found:")
for issue in issues:
print(f" - {issue}")
Schema Validation¶
class SchemaValidator:
def __init__(self):
self.schemas = {}
def define_schema(self, functor, arity, types=None):
self.schemas[functor] = {
"arity": arity,
"types": types or []
}
def validate_fact(self, functor, *args):
if functor not in self.schemas:
return True # No schema defined
schema = self.schemas[functor]
# Check arity
if len(args) != schema["arity"]:
return False, f"Wrong arity: expected {schema['arity']}, got {len(args)}"
# Check types if defined
if schema["types"]:
for i, (arg, expected_type) in enumerate(zip(args, schema["types"])):
if expected_type and not isinstance(arg, expected_type):
return False, f"Arg {i}: expected {expected_type}, got {type(arg)}"
return True, "Valid"
# Usage
validator = SchemaValidator()
validator.define_schema("age", 2, [str, int])
validator.define_schema("enrolled", 3, [str, str, str])
# Validate before adding
is_valid, msg = validator.validate_fact("age", "alice", "twenty-five")
if not is_valid:
print(f"Invalid fact: {msg}")
Next Steps¶
- Query Evaluation - Advanced query techniques
- LLM Integration - AI-powered knowledge generation
- API Reference - Programmatic knowledge management
- Examples - Real-world knowledge bases