Projects/Neo4j_graph_database_simulation.py

243 lines
9.7 KiB
Python

"""
Neo4j Graph Database Simulation
This module provides a simple in-memory simulation of a Neo4j-style graph database,
including node and relationship representations, a basic graph database class, and
a Cypher-like query engine for pattern matching and querying nodes and relationships.
Classes:
---------
Node:
Represents a graph node with an ID, labels, and properties.
Relationship:
Represents a directed relationship between two nodes, with a type and properties.
GraphDB:
In-memory graph database supporting node and relationship creation, matching nodes
and relationships by label/type and properties, and parsing property/where clauses
from Cypher-like queries.
CypherEngine:
Provides a basic Cypher-like query interface for matching nodes and relationships
using simplified Cypher syntax (MATCH, WHERE, RETURN).
Functions:
----------
GraphDB.add_node(node_id, labels=None, **properties):
Adds a node to the graph with the given ID, labels, and properties.
GraphDB.add_relationship(from_node, to_node, rel_type, **properties):
Adds a relationship of the given type and properties between two nodes.
GraphDB.match(label=None, property_key=None, property_value=None):
Returns nodes matching the given label and/or property key/value.
GraphDB.match_relationship(rel_type=None, from_node=None, to_node=None):
Returns relationships matching the given type and/or endpoints.
GraphDB.parse_properties(prop_str):
Parses a property string from Cypher syntax into a dictionary.
GraphDB.parse_where_clause(where_str):
Parses a WHERE clause from Cypher syntax into a list of filters.
CypherEngine.run(query):
Executes a Cypher-like query string and returns matching nodes or relationships.
Example Usage:
--------------
- Create nodes and relationships.
- Query nodes and relationships directly or using Cypher-like queries.
- Output results for demonstration.
"""
import re
# Simple representation of a node
class Node:
def __init__(self, node_id, labels=None, properties=None):
self.id = node_id
self.labels = set(labels or [])
self.properties = properties or {}
# Simple representation of a relationship
class Relationship:
def __init__(self, from_node, to_node, rel_type, properties=None):
self.from_node = from_node
self.to_node = to_node
self.type = rel_type
self.properties = properties or {}
# Simple in-memory graph database
class GraphDB:
def __init__(self):
self.nodes = {} # node_id -> Node
self.relationships = [] # list of Relationship objects
def add_node(self, node_id, labels=None, **properties):
self.nodes[node_id] = Node(node_id, labels, properties)
def add_relationship(self, from_node, to_node, rel_type, **properties):
if from_node in self.nodes and to_node in self.nodes:
self.relationships.append(Relationship(from_node, to_node, rel_type, properties))
def match(self, label=None, property_key=None, property_value=None):
return [
node for node in self.nodes.values()
if (not label or label in node.labels) and
(not property_key or node.properties.get(property_key) == property_value)
]
def match_relationship(self, rel_type=None, from_node=None, to_node=None):
return [
rel for rel in self.relationships
if (not rel_type or rel.type == rel_type) and
(not from_node or rel.from_node == from_node) and
(not to_node or rel.to_node == to_node)
]
def parse_properties(prop_str):
props = {}
for pair in re.findall(r"(\w+):\s*('([^']*)'|\d+|true|false)", prop_str):
key, raw_val, str_val = pair
if raw_val.startswith("'"):
props[key] = str_val
elif raw_val in ['true', 'false']:
props[key] = raw_val == 'true'
else:
props[key] = int(raw_val)
return props
def parse_where_clause(where_str):
comparisons = re.findall(r"(\w+)\.(\w+)\s*(=|>|<|>=|<=)\s*('([^']*)'|\d+|true|false)", where_str)
filters = []
for _, key, op, raw_val, str_val in comparisons:
if raw_val.startswith("'"):
val = str_val
elif raw_val in ['true', 'false']:
val = raw_val == 'true'
else:
val = int(raw_val)
filters.append((key, op, val))
return filters
# Simple Neo4j-Style query engine
class CypherEngine:
def __init__(self, graph):
self.graph = graph
def run(self, query):
# Basic MATCH node pattern: MATCH (n:Label {key: value}) RETURN n
match_node = re.match(r"MATCH\s+\((\w+):(\w+)\s*\{(\w+):\s*'([^']+)'\}\)\s+RETURN\s+\1", query)
if match_node:
_, label, key, value = match_node.groups()
return self.graph.match(label=label, property_key=key, property_value=value)
# (e.g., MATCH (n:Person {name: 'Alice', age: 30}) RETURN n)
match_node = re.match(r"MATCH\s+\((\w+):(\w+)\s*\{([^}]+)\}\)\s+RETURN\s+\1", query)
if match_node:
_, label, prop_str = match_node.groups()
props = GraphDB.parse_properties(prop_str)
return self.graph.match(label=label, **props)
# (e.g., MATCH (n:Person) WHERE n.age > 25 RETURN n)
match_node = re.match(r"MATCH\s+\((\w+):(\w+)\)\s*(?:WHERE\s+(.+?))?\s+RETURN\s+\1", query)
if match_node:
_, label, where_str = match_node.groups()
nodes = self.graph.match(label=label)
if where_str:
filters = GraphDB.parse_where_clause(where_str)
def passes(node):
for key, op, val in filters:
node_val = node.properties.get(key)
if node_val is None:
return False
if op == '=' and node_val != val:
return False
if op == '>' and not node_val > val:
return False
if op == '<' and not node_val < val:
return False
if op == '>=' and not node_val >= val:
return False
if op == '<=' and not node_val <= val:
return False
return True
nodes = [n for n in nodes if passes(n)]
return nodes
# End of code
### Relationship patterns
# Basic MATCH relationship pattern: MATCH (a)-[:TYPE]->(b) RETURN a,b
match_rel = re.match(r"MATCH\s+\((\w+)\)-\[:(\w+)\]->\((\w+)\)\s+RETURN\s+\1,\s*\3", query)
# MATCH (a:Person)-[:FRIEND]->(b:Person) RETURN a,b
match_rel = re.match(r"MATCH\s+\((\w+):(\w+)?\)-\[:(\w+)\]->\((\w+):(\w+)?\)\s+RETURN\s+\1,\s*\4", query)
if match_rel:
_, from_label, rel_type, _, to_label = match_rel.groups()
rels = self.graph.match_relationship(rel_type=rel_type)
return [
rel for rel in rels
if (not from_label or from_label in self.graph.nodes[rel.from_node].labels) and
(not to_label or to_label in self.graph.nodes[rel.to_node].labels)
]
return "Unsupported query format"
### Example usage and test cases
# Create graph and add nodes/relationships
g = GraphDB()
g.add_node("A", labels=["Person"], name="Alice", age=30)
g.add_node("D", labels=["Person"], name="Justin", age=36)
g.add_node("B", labels=["Person"], name="Bob", age=25)
g.add_node("C", labels=["City"], name="Phoenix")
g.add_relationship("A", "B", "FRIEND", since=2020)
g.add_relationship("B", "A", "FRIEND", since=2020)
g.add_relationship("B", "D", "FRIEND", since=2023)
g.add_relationship("A", "C", "LIVES_IN", name="Phoenix")
### Direct graph queries
# Match nodes with label 'Person'
people = g.match(label="Person")
# Match relationships of type 'FRIEND'
friendships = g.match_relationship(rel_type="FRIEND")
# Match relationships of type 'LIVES_IN'
locations = g.match_relationship(rel_type="LIVES_IN")
# Match people older than 25
Older_Than_25 = [n for n in g.match(label="Person") if n.properties.get("age", 0) > 25]
### Neo4j-Style Cypher queries
engine = CypherEngine(g) # Initialize engine with graph g
people = engine.run("MATCH (n:Person) RETURN n") # Match all persons
friendships = engine.run("MATCH (a:Person)-[:FRIEND]->(b:Person) RETURN a,b") # Match friendships
locations = engine.run("MATCH (n:City) RETURN n") # Match all cities
locations = engine.run("MATCH (a:Person)-[:LIVES_IN]->(b:City) RETURN a,b") # Match who lives where
Older_Than_25 = engine.run("MATCH (n:Person) WHERE n.age > 25 RETURN n") # People older than 25
### Output results
print("People:", [(p.id, p.properties) for p in people])
print()
print("Friendships:", [ (rel.from_node, rel.to_node, rel.properties) for rel in friendships])
print()
print("People w/Age > 25:", [(n.id, n.properties) for n in Older_Than_25])
print()
print("People who live in Phoenix:", [(rel.from_node, rel.to_node, rel.properties) for rel in locations])
print()
print("City(ies):", [(n.properties) for n in locations])
'''
Expected Output:
People: [('A', {'name': 'Alice', 'age': 30}), ('D', {'name': 'Justin', 'age': 36}), ('B', {'name': 'Bob', 'age': 25})]
Friendships: [('A', 'B', {'since': 2020}), ('B', 'A', {'since': 2020}), ('B', 'D', {'since': 2023})]
People w/Age > 25: [('A', {'name': 'Alice', 'age': 30}), ('D', {'name': 'Justin', 'age': 36})]
People who live in Phoenix: [('A', 'C', {'name': 'Phoenix'})]
City(ies): [{'name': 'Phoenix'}]
'''