Skip to content

Commit

Permalink
In_component out_component (#1790)
Browse files Browse the repository at this point in the history
* Added first version of functions

* node tests

* Adding persistent graph test

* Added graphql tests

* new 83 fmt

* Added no-op for eventgraph/persistentgraph

* Add individual in-component and out-component algorithms

* Added in-component out-component functions

* Tidy lucas comments

* fmt

* sort imports

---------

Co-authored-by: Ben Steer <[email protected]>
Co-authored-by: Ben Steer <[email protected]>
Co-authored-by: Ben Steer <[email protected]>
  • Loading branch information
4 people authored Oct 2, 2024
1 parent 08ed007 commit 318dc9a
Show file tree
Hide file tree
Showing 24 changed files with 434 additions and 98 deletions.
18 changes: 18 additions & 0 deletions python/python/raphtory/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1768,6 +1768,14 @@ class Graph:
path (str): The path to the file.
"""

def save_to_zip(self, path):
"""
Saves the Graph to the given path.
Arguments:
path (str): The path to the file.
"""

def serialise(self):
"""
Serialise Graph to bytes.
Expand Down Expand Up @@ -1941,6 +1949,7 @@ class Graph:
graph_template=None,
node_template=None,
edge_template=None,
graph_name=None,
verbose=False,
):
"""
Expand Down Expand Up @@ -4696,6 +4705,14 @@ class PersistentGraph:
path (str): The path to the file.
"""

def save_to_zip(self, path):
"""
Saves the PersistentGraph to the given path.
Arguments:
path (str): The path to the file.
"""

def serialise(self):
"""
Serialise PersistentGraph to bytes.
Expand Down Expand Up @@ -4870,6 +4887,7 @@ class PersistentGraph:
graph_template=None,
node_template=None,
edge_template=None,
graph_name=None,
verbose=False,
):
"""
Expand Down
22 changes: 22 additions & 0 deletions python/python/raphtory/algorithms/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,17 @@ def hits(g, iter_count=20, threads=None):
An AlgorithmResult object containing the mapping from node ID to the hub and authority score of the node
"""

def in_component(node):
"""
In component -- Finding the "in-component" of a node in a directed graph involves identifying all nodes that can be reached following only incoming edges.
Arguments:
node (Node) : The node whose in-component we wish to calculate
Returns:
An array containing the Nodes within the given nodes in-component
"""

def in_components(g):
"""
In components -- Finding the "in-component" of a node in a directed graph involves identifying all nodes that can be reached following only incoming edges.
Expand Down Expand Up @@ -397,6 +408,17 @@ def min_out_degree(g):
int : value of the smallest outdegree
"""

def out_component(node):
"""
Out component -- Finding the "out-component" of a node in a directed graph involves identifying all nodes that can be reached following only outgoing edges.
Arguments:
node (Node) : The node whose out-component we wish to calculate
Returns:
An array containing the Nodes within the given nodes out-component
"""

def out_components(g):
"""
Out components -- Finding the "out-component" of a node in a directed graph involves identifying all nodes that can be reached following only outgoing edges.
Expand Down
57 changes: 24 additions & 33 deletions python/python/raphtory/graphql/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -32,31 +32,35 @@ class GraphServer:
port (int): The port to use (defaults to 1736).
"""

def start(self, port=1736, timeout_ms=None):
def set_embeddings(
self,
cache,
embedding=None,
graph_template=None,
node_template=None,
edge_template=None,
):
"""
Start the server and return a handle to it.
Setup the server to vectorise graphs with a default template.
Arguments:
port (int): the port to use (defaults to 1736).
timeout_ms (int): wait for server to be online (defaults to 5000). The server is stopped if not online within timeout_ms but manages to come online as soon as timeout_ms finishes!
"""
cache (str): the directory to use as cache for the embeddings.
embedding (Function): the embedding function to translate documents to embeddings.
graph_template (String): the template to use for graphs.
node_template (String): the template to use for nodes.
edge_template (String): the template to use for edges.
def with_document_search_function(self, name, input, function):
Returns:
GraphServer: A new server object with embeddings setup.
"""
Register a function in the GraphQL schema for document search over a graph.

The function needs to take a `VectorisedGraph` as the first argument followed by a
pre-defined set of keyword arguments. Supported types are `str`, `int`, and `float`.
They have to be specified using the `input` parameter as a dict where the keys are the
names of the parameters and the values are the types, expressed as strings.
def start(self, port=1736, timeout_ms=None):
"""
Start the server and return a handle to it.
Arguments:
name (str): The name of the function in the GraphQL schema.
input (dict): The keyword arguments expected by the function.
function (Function): the function to run.
Returns:
GraphServer: A new server object containing the vectorised graphs.
port (int): the port to use (defaults to 1736).
timeout_ms (int): wait for server to be online (defaults to 5000). The server is stopped if not online within timeout_ms but manages to come online as soon as timeout_ms finishes!
"""

def with_global_search_function(self, name, input, function):
Expand All @@ -74,30 +78,17 @@ class GraphServer:
function (Function): the function to run.
Returns:
GraphServer: A new server object containing the vectorised graphs.
GraphServer: A new server object with the function registered
"""

def with_vectorised(
self,
cache,
graph_names=None,
embedding=None,
graph_template=None,
node_template=None,
edge_template=None,
def with_vectorised_graphs(
self, graph_names, graph_template=None, node_template=None, edge_template=None
):
"""
Vectorise a subset of the graphs of the server.
Note:
If no embedding function is provided, the server will attempt to use the OpenAI API
embedding model, which will only work if the env variable OPENAI_API_KEY is set
appropriately
Arguments:
graph_names (List[str]): the names of the graphs to vectorise. All by default.
cache (str): the directory to use as cache for the embeddings.
embedding (Function): the embedding function to translate documents to embeddings.
graph_template (String): the template to use for graphs.
node_template (String): the template to use for nodes.
edge_template (String): the template to use for edges.
Expand Down
76 changes: 76 additions & 0 deletions python/tests/graphql/misc/test_components.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from raphtory.graphql import RaphtoryClient
from raphtory.graphql import GraphServer
from raphtory import Graph
import tempfile


def test_in_out_components():

def sort_components(data):
if "inComponent" in data:
data["inComponent"] = sorted(data["inComponent"], key=lambda x: x["name"])
if "outComponent" in data:
data["outComponent"] = sorted(data["outComponent"], key=lambda x: x["name"])

def prepare_for_comparison(structure):
if "node" in structure:
sort_components(structure["node"])
if "window" in structure:
sort_components(structure["window"]["node"])
if "at" in structure:
sort_components(structure["at"]["node"])

query = """
{
graph(path: "graph") {
node(name: "3") {
inComponent {
name
}
outComponent {
name
}
}
window(start:1,end:6){
node(name:"3"){
inComponent{
name
}
}
}
at(time:4){
node(name:"4"){
outComponent{
name
}
}
}
}
}
"""
result = {
"graph": {
"node": {
"inComponent": [{"name": "7"}, {"name": "1"}],
"outComponent": [{"name": "6"}, {"name": "4"}, {"name": "5"}],
},
"window": {"node": {"inComponent": [{"name": "1"}]}},
"at": {"node": {"outComponent": [{"name": "5"}]}},
}
}
work_dir = tempfile.mkdtemp()
g = Graph()
g.add_edge(1, 1, 2)
g.add_edge(2, 1, 3)
g.add_edge(3, 3, 4)
g.add_edge(4, 4, 5)
g.add_edge(5, 3, 6)
g.add_edge(6, 7, 3)

g.save_to_file(work_dir + "/graph")
with GraphServer(work_dir).start():
client = RaphtoryClient("http://localhost:1736")
query_res = client.query(query)
prepare_for_comparison(query_res["graph"])
prepare_for_comparison(result["graph"])
assert query_res == result
41 changes: 26 additions & 15 deletions python/tests/graphql/misc/test_graphql_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,22 @@
from raphtory.graphql import GraphServer, RaphtoryClient
from raphtory import Graph


def embedding(texts):
return [[text.count("a"), text.count("b")] for text in texts]


def test_embedding():
result = embedding(texts=["aaa", "b", "ab", "ba"])
assert result == [[3, 0], [0, 1], [1, 1], [1, 1]]


def setup_graph(g):
g.update_constant_properties({"name": "abb"})
g.add_node(1, "aab")
g.add_edge(1, "aab", "bbb")


def assert_correct_documents(client):
query = """{
plugins {
Expand All @@ -38,34 +42,38 @@ def assert_correct_documents(client):
}"""
result = client.query(query)
assert result == {
'plugins': {
'globalSearch': [
"plugins": {
"globalSearch": [
{
'content': 'abb',
'embedding': [1.0, 2.0],
'entityType': 'graph',
'name': ['abb'],
"content": "abb",
"embedding": [1.0, 2.0],
"entityType": "graph",
"name": ["abb"],
},
],
},
'vectorisedGraph': {
'algorithms': {
'similaritySearch': [{
'content': 'aab',
'embedding': [2.0, 1.0],
'entityType': 'node',
'name': ['aab']}]
"vectorisedGraph": {
"algorithms": {
"similaritySearch": [
{
"content": "aab",
"embedding": [2.0, 1.0],
"entityType": "node",
"name": ["aab"],
}
]
}
}
},
}


def setup_server(work_dir):
server = GraphServer(work_dir)
server = server.set_embeddings(
cache="/tmp/graph-cache",
embedding=embedding,
node_template="{{ name }}",
graph_template="{{ props.name }}"
graph_template="{{ props.name }}",
)
return server

Expand All @@ -81,6 +89,7 @@ def test_new_graph():
setup_graph(rg)
assert_correct_documents(client)


def test_upload_graph():
print("test_upload_graph")
work_dir = tempfile.mkdtemp()
Expand All @@ -95,6 +104,7 @@ def test_upload_graph():
client.upload_graph(path="abb", file_path=g_path, overwrite=True)
assert_correct_documents(client)


def test_include_graph():
work_dir = tempfile.mkdtemp()
g_path = work_dir + "/abb"
Expand All @@ -106,5 +116,6 @@ def test_include_graph():
client = RaphtoryClient("http://localhost:1736")
assert_correct_documents(client)


test_upload_graph()
test_include_graph()
34 changes: 34 additions & 0 deletions python/tests/test_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,23 @@ def test_in_components():
assert actual == expected


def test_in_component():
g = Graph()
g.add_edge(1, 1, 2)
g.add_edge(2, 1, 3)
g.add_edge(3, 3, 4)
g.add_edge(4, 4, 5)
g.add_edge(5, 3, 6)
g.add_edge(6, 7, 3)

actual = algorithms.in_component(g.node(3))
correct = [g.node(7), g.node(1)]
assert set(actual) == set(correct)
actual = algorithms.in_component(g.node(3).window(1, 6))
correct = [g.node(1)]
assert set(actual) == set(correct)


def test_out_components():
g = gen_graph()
actual = algorithms.out_components(g).get_all_with_names()
Expand All @@ -75,6 +92,23 @@ def test_out_components():
assert actual == expected


def test_out_component():
g = Graph()
g.add_edge(1, 1, 2)
g.add_edge(2, 1, 3)
g.add_edge(3, 3, 4)
g.add_edge(4, 4, 5)
g.add_edge(5, 3, 6)
g.add_edge(6, 7, 3)

actual = algorithms.out_component(g.node(3))
correct = [g.node(4), g.node(5), g.node(6)]
assert set(actual) == set(correct)
actual = algorithms.out_component(g.node(4).at(4))
correct = [g.node(5)]
assert set(actual) == set(correct)


def test_empty_algo():
g = Graph()
assert algorithms.weakly_connected_components(g, 20).get_all_with_names() == {}
Expand Down
Loading

0 comments on commit 318dc9a

Please sign in to comment.