Neo4j Graph Database Simulation in Python code

This commit is contained in:
Donald Calloway 2025-09-28 15:45:11 -07:00
parent 7c63daa757
commit b60013e216

View File

@ -1,11 +1,55 @@
"""
Neo4j Graph Database Simulation
This module provides a simple in-memory simulation of a Neo4j-style graph database,
including node and relationship representations, a basic graph database class, and
a Cypher-like query engine for pattern matching and querying nodes and relationships.
Classes:
---------
Node:
Represents a graph node with an ID, labels, and properties.
Relationship:
Represents a directed relationship between two nodes, with a type and properties.
GraphDB:
In-memory graph database supporting node and relationship creation, matching nodes
and relationships by label/type and properties, and parsing property/where clauses
from Cypher-like queries.
CypherEngine:
Provides a basic Cypher-like query interface for matching nodes and relationships
using simplified Cypher syntax (MATCH, WHERE, RETURN).
Functions:
----------
GraphDB.add_node(node_id, labels=None, **properties):
Adds a node to the graph with the given ID, labels, and properties.
GraphDB.add_relationship(from_node, to_node, rel_type, **properties):
Adds a relationship of the given type and properties between two nodes.
GraphDB.match(label=None, property_key=None, property_value=None):
Returns nodes matching the given label and/or property key/value.
GraphDB.match_relationship(rel_type=None, from_node=None, to_node=None):
Returns relationships matching the given type and/or endpoints.
GraphDB.parse_properties(prop_str):
Parses a property string from Cypher syntax into a dictionary.
GraphDB.parse_where_clause(where_str):
Parses a WHERE clause from Cypher syntax into a list of filters.
CypherEngine.run(query):
Executes a Cypher-like query string and returns matching nodes or relationships.
Example Usage:
--------------
- Create nodes and relationships.
- Query nodes and relationships directly or using Cypher-like queries.
- Output results for demonstration.
"""
import re
# Simple representation of a node
class Node:
def __init__(self, node_id, labels=None, properties=None):
self.id = node_id
self.labels = set(labels or [])
self.properties = properties or {}
# Simple representation of a relationship
class Relationship:
def __init__(self, from_node, to_node, rel_type, properties=None):
self.from_node = from_node
@ -13,6 +57,8 @@ class Relationship:
self.type = rel_type
self.properties = properties or {}
# Simple in-memory graph database
class GraphDB:
def __init__(self):
self.nodes = {} # node_id -> Node
@ -55,7 +101,7 @@ class GraphDB:
def parse_where_clause(where_str):
comparisons = re.findall(r"(\w+)\.(\w+)\s*(=|>|<|>=|<=)\s*('([^']*)'|\d+|true|false)", where_str)
filters = []
for var, key, op, raw_val, str_val in comparisons:
for _, key, op, raw_val, str_val in comparisons:
if raw_val.startswith("'"):
val = str_val
elif raw_val in ['true', 'false']:
@ -65,7 +111,7 @@ class GraphDB:
filters.append((key, op, val))
return filters
# Simple Neo4j-Style query engine
class CypherEngine:
def __init__(self, graph):
self.graph = graph
@ -74,20 +120,20 @@ class CypherEngine:
# Basic MATCH node pattern: MATCH (n:Label {key: value}) RETURN n
match_node = re.match(r"MATCH\s+\((\w+):(\w+)\s*\{(\w+):\s*'([^']+)'\}\)\s+RETURN\s+\1", query)
if match_node:
var, label, key, value = match_node.groups()
_, label, key, value = match_node.groups()
return self.graph.match(label=label, property_key=key, property_value=value)
# (e.g., MATCH (n:Person {name: 'Alice', age: 30}) RETURN n)
match_node = re.match(r"MATCH\s+\((\w+):(\w+)\s*\{([^}]+)\}\)\s+RETURN\s+\1", query)
if match_node:
var, label, prop_str = match_node.groups()
_, label, prop_str = match_node.groups()
props = GraphDB.parse_properties(prop_str)
return self.graph.match(label=label, **props)
# (e.g., MATCH (n:Person) WHERE n.age > 25 RETURN n)
match_node = re.match(r"MATCH\s+\((\w+):(\w+)\)\s*(?:WHERE\s+(.+?))?\s+RETURN\s+\1", query)
if match_node:
var, label, where_str = match_node.groups()
_, label, where_str = match_node.groups()
nodes = self.graph.match(label=label)
if where_str:
filters = GraphDB.parse_where_clause(where_str)
@ -109,14 +155,18 @@ class CypherEngine:
return True
nodes = [n for n in nodes if passes(n)]
return nodes
# End of code
### Relationship patterns
# Basic MATCH relationship pattern: MATCH (a)-[:TYPE]->(b) RETURN a,b
match_rel = re.match(r"MATCH\s+\((\w+)\)-\[:(\w+)\]->\((\w+)\)\s+RETURN\s+\1,\s*\3", query)
# MATCH (a:Person)-[:FRIEND]->(b:Person) RETURN a,b
match_rel = re.match(r"MATCH\s+\((\w+):(\w+)?\)-\[:(\w+)\]->\((\w+):(\w+)?\)\s+RETURN\s+\1,\s*\4", query)
if match_rel:
from_var, from_label, rel_type, to_var, to_label = match_rel.groups()
_, from_label, rel_type, _, to_label = match_rel.groups()
rels = self.graph.match_relationship(rel_type=rel_type)
return [
rel for rel in rels
@ -125,34 +175,68 @@ class CypherEngine:
]
return "Unsupported query format"
# Example usage and test cases
### Example usage and test cases
# Create graph and add nodes/relationships
g = GraphDB()
g.add_node("A", labels=["Person"], name="Alice", age=30)
g.add_node("D", labels=["Person"], name="Justin", age=36)
g.add_node("B", labels=["Person"], name="Bob", age=25)
g.add_node("C", labels=["City"], name="Phoenix")
g.add_relationship("A", "B", "FRIEND", since=2020)
g.add_relationship("A", "C", "LIVES_IN")
g.add_relationship("B", "A", "FRIEND", since=2020)
g.add_relationship("B", "D", "FRIEND", since=2023)
g.add_relationship("A", "C", "LIVES_IN", name="Phoenix")
### Direct graph queries
# Match nodes with label 'Person'
people = g.match(label="Person")
# Match relationships of type 'FRIEND'
friendships = g.match_relationship(rel_type="FRIEND")
engine = CypherEngine(g)
result1 = engine.run("MATCH (n:Person {name: 'Alice'}) RETURN n")
result2 = engine.run("MATCH (n:Person) WHERE n.age > 25 RETURN n")
result3 = engine.run("MATCH (a:Person)-[:FRIEND]->(b:Person) RETURN a,b")
print("People:", [(p.id, p.properties) for p in people])
# Match relationships of type 'LIVES_IN'
locations = g.match_relationship(rel_type="LIVES_IN")
# Match people older than 25
Older_Than_25 = [n for n in g.match(label="Person") if n.properties.get("age", 0) > 25]
### Neo4j-Style Cypher queries
engine = CypherEngine(g) # Initialize engine with graph g
people = engine.run("MATCH (n:Person) RETURN n") # Match all persons
friendships = engine.run("MATCH (a:Person)-[:FRIEND]->(b:Person) RETURN a,b") # Match friendships
locations = engine.run("MATCH (n:City) RETURN n") # Match all cities
locations = engine.run("MATCH (a:Person)-[:LIVES_IN]->(b:City) RETURN a,b") # Match who lives where
Older_Than_25 = engine.run("MATCH (n:Person) WHERE n.age > 25 RETURN n") # People older than 25
### Output results
print("People:", [(p.id, p.properties) for p in people])
print()
print("Friendships:", [ (rel.from_node, rel.to_node, rel.properties) for rel in friendships])
print("Result 1:", [(n.id, n.properties) for n in result1])
print("Result 2:", [(n.id, n.properties) for n in result2])
print("Result 3:", [ (rel.from_node, rel.to_node, rel.properties) for rel in result3])
print()
print("People w/Age > 25:", [(n.id, n.properties) for n in Older_Than_25])
print()
print("People who live in Phoenix:", [(rel.from_node, rel.to_node, rel.properties) for rel in locations])
print()
print("City(ies):", [(n.properties) for n in locations])
# Expected Output:
# People: [('A', {'name': 'Alice', 'age': 30}), ('B', {'name': 'Bob', 'age': 25})]
# Friendships: [('A', 'B', {'since': 2020})]
# Result 1: [('A', {'name': 'Alice', 'age': 30})]
# Result 2: [('A', {'name': 'Alice', 'age': 30))]
# Result 3: [('A', 'B', {'since': 2020})]
'''
Expected Output:
People: [('A', {'name': 'Alice', 'age': 30}), ('D', {'name': 'Justin', 'age': 36}), ('B', {'name': 'Bob', 'age': 25})]
Friendships: [('A', 'B', {'since': 2020}), ('B', 'A', {'since': 2020}), ('B', 'D', {'since': 2023})]
People w/Age > 25: [('A', {'name': 'Alice', 'age': 30}), ('D', {'name': 'Justin', 'age': 36})]
People who live in Phoenix: [('A', 'C', {'name': 'Phoenix'})]
City(ies): [{'name': 'Phoenix'}]
'''