generated from amosproj/amos202Xss0Y-projname
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #154 from amosproj/develop
Develop
- Loading branch information
Showing
26 changed files
with
2,440 additions
and
813 deletions.
There are no files selected for viewing
186 changes: 186 additions & 0 deletions
186
Project/backend/codebase/graph_analysis/graph_analysis.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
import networkx as nx | ||
import os | ||
import json | ||
|
||
|
||
def analyze_graph_structure(G): | ||
"""Analyzes the structure of a knowledge graph and provides hopefully useful information. | ||
Currently, I am not sure how to use most of the information, but we may find a way to use it | ||
Args: | ||
G: A networkx graph. | ||
Returns: | ||
A dictionary containing information about the graph's structure. | ||
""" | ||
|
||
# Basic Graph Statistics | ||
num_nodes = G.number_of_nodes() # Total number of nodes | ||
num_edges = G.number_of_edges() # Total number of edges | ||
density = nx.density(G) # Ratio of actual edges to possible edges (0 to 1) | ||
average_degree = 2 * num_edges / num_nodes # Average number of edges per node | ||
|
||
# Degree Distribution | ||
degree_distribution = dict(G.degree()) | ||
# Degree distribution can indicate the presence of hubs or important nodes | ||
|
||
degree_centrality = nx.degree_centrality(G) | ||
|
||
""" Centrality Measures | ||
- Degree Centrality: Measures node connectivity | ||
- Nodes with high degree centrality are important in the network | ||
Examples: 3 nodes are connected in a line | ||
node1 - node2 - node3 | ||
- Degree: node1 = 1, node2 = 2, node3 = 1 | ||
- Degree Centrality: node1 = 0.33(1/3), node2 = 0.66(2/3), node3 = 0.33(1/3) | ||
""" | ||
|
||
betweenness_centrality = nx.betweenness_centrality(G) | ||
|
||
""" | ||
- Betweenness Centrality: Measures node's control over information flow | ||
- Nodes with high betweenness centrality are important in the network | ||
Examples: 4 nodes are connected | ||
1 | ||
/ \ | ||
2-----3 | ||
\ / | ||
4 | ||
- Here, node 2 has the highest betweenness centrality because it lies on the shortest path between all other nodes | ||
- if node in not between any other nodes, its betweenness centrality is 0 | ||
- Betweenness Centrality show the dependency of the network on a node | ||
""" | ||
|
||
# - Closeness Centrality: Measures average length of the shortest path from a node to all other nodes | ||
closeness_centrality = nx.closeness_centrality(G) | ||
|
||
""" | ||
- Closeness Centrality: Measures average length of the shortest path from a node to all other nodes | ||
- Nodes with high closeness centrality are important in the network | ||
Examples: 4 nodes are connected | ||
0 | ||
/ | \ | ||
2--1--3 | ||
- Here, node 0, 1 (1.0) has the highest closeness centrality because it is connected to all other nodes (node 2, 3 = 0.75) | ||
- Closeness Centrality show the average distance of a node to all other nodes in the network | ||
""" | ||
|
||
# - Eigenvector Centrality: Measures influence of a node in a network | ||
eigenvector_centrality = nx.eigenvector_centrality(G) | ||
|
||
""" | ||
- Eigenvector Centrality: Measures influence of a node in a network | ||
- Nodes with high eigenvector centrality are important in the network | ||
Examples: 4 nodes are connected | ||
1 | ||
/ \ | ||
2-----3 | ||
\ / | ||
4 | ||
- Here, node 3 has the highest eigenvector centrality because it is connected to node 2 which has high eigenvector centrality | ||
- Eigenvector Centrality show the influence of a node in the network | ||
- Eigenvector Centrality is similar to PageRank algorithm | ||
- in this measure every node has some values and the values are updated based on the values of the connected nodes (node value != 0) | ||
""" | ||
|
||
# Community Structure | ||
# - Louvain Algorithm (for community detection) | ||
communities = list(nx.community.greedy_modularity_communities(G)) | ||
community_sizes = [len(community) for community in communities] | ||
num_communities = len(communities) | ||
# Communities can reveal modular structures in the graph | ||
""" | ||
- Community Detection: Identifying groups of nodes that are more connected to each other than to the rest of the network | ||
- Communities can reveal modular structures in the graph | ||
- Communities can be used to identify groups of nodes that are more connected to each other than to the rest of the network | ||
Examples: 7 nodes are connected | ||
1 | ||
/ \ | ||
2-----3 | ||
\ / 5 | ||
4-----/ \ | ||
6-----7 | ||
- Here, nodes 1, 2, 3, 4 are in one community and nodes 5, 6, 7 are in another community | ||
""" | ||
|
||
# Graph Connectivity | ||
# - Check if the graph is connected | ||
is_connected = nx.is_connected(G) | ||
# - Calculate diameter: Longest shortest path between any two nodes | ||
diameter = nx.diameter(G) if is_connected else float('inf') | ||
# - Average shortest path length: Average of all shortest paths in the graph | ||
average_shortest_path_length = nx.average_shortest_path_length(G) if is_connected else float('inf') | ||
|
||
# Clustering Coefficient | ||
# - Measures the degree to which nodes tend to cluster together | ||
average_clustering_coefficient = nx.average_clustering(G) | ||
|
||
# Assortativity | ||
# - Measures the similarity of connections in the graph with respect to node degree | ||
assortativity = nx.degree_assortativity_coefficient(G) | ||
|
||
# Graph Diameter and Radius | ||
# - Diameter: Longest shortest path in the graph | ||
# - Radius: Minimum eccentricity of any node | ||
radius = nx.radius(G) if is_connected else float('inf') | ||
|
||
# Graph Transitivity | ||
# - Measures the overall probability for the network to have adjacent nodes interconnected | ||
transitivity = nx.transitivity(G) | ||
|
||
# Return a dictionary containing the structural information | ||
graph_info = { | ||
"num_nodes": num_nodes, | ||
"num_edges": num_edges, | ||
"density": density, | ||
"average_degree": average_degree, | ||
"degree_distribution": degree_distribution, | ||
"degree_centrality": degree_centrality, | ||
"betweenness_centrality": betweenness_centrality, | ||
"closeness_centrality": closeness_centrality, | ||
"eigenvector_centrality": eigenvector_centrality, | ||
"num_communities": num_communities, | ||
"community_sizes": community_sizes, | ||
"is_connected": is_connected, | ||
"diameter": diameter, | ||
"average_shortest_path_length": average_shortest_path_length, | ||
"average_clustering_coefficient": average_clustering_coefficient, | ||
"assortativity": assortativity, | ||
"radius": radius, | ||
"transitivity": transitivity | ||
} | ||
|
||
return graph_info | ||
|
||
|
||
def print_graph_info(graph_info): | ||
"""Prints the graph information in a formatted and readable way. | ||
Args: | ||
graph_info: A dictionary containing information about the graph's structure. | ||
""" | ||
|
||
print(json.dumps(graph_info, indent=4)) | ||
|
||
|
||
graph_directory = os.fsencode("../.media/graphs/") | ||
|
||
with os.scandir("./Project/backend/codebase/.media/graphs/") as it: | ||
for entry in it: | ||
if entry.name.endswith(".gml") and entry.is_file(): | ||
print("-----------------------") | ||
print(f"Filename: {entry.name}") | ||
graph = nx.read_gml(entry.path) | ||
graph_info = analyze_graph_structure(graph) | ||
print_graph_info(graph_info) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
80 changes: 80 additions & 0 deletions
80
Project/backend/codebase/graph_creator/services/query_graph.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import os | ||
|
||
import networkx as nx | ||
import spacy | ||
from langchain.chains.graph_qa.base import GraphQAChain | ||
from langchain_community.graphs import NetworkxEntityGraph | ||
from langchain_groq import ChatGroq | ||
from networkx import NetworkXError | ||
|
||
from graph_creator.llama3 import configure_groq | ||
from graph_creator.schemas.graph_vis import GraphQueryOutput | ||
|
||
|
||
class GraphQuery: | ||
|
||
def query_graph(self, graph: nx.Graph, query: str) -> GraphQueryOutput: | ||
entities_from_llm = self.retrieve_entities_from_llm(query) | ||
entities_from_spacy = self.retrieve_entities_from_spacy(query) | ||
all_entities = set() | ||
all_entities.update(entities_from_llm) | ||
all_entities.update(entities_from_spacy) | ||
|
||
entities_relationships = {} | ||
|
||
for node in all_entities: | ||
edges_info = [] | ||
try: | ||
for neighbor in graph.neighbors(node): | ||
edge_data = graph.get_edge_data(node, neighbor) | ||
relationship = edge_data.get('relation', 'is connected to') | ||
edges_info.append((relationship, neighbor)) | ||
entities_relationships[node] = edges_info | ||
except NetworkXError: | ||
continue | ||
|
||
return GraphQueryOutput( | ||
llm_nodes=entities_from_llm, | ||
spacy_nodes=entities_from_spacy, | ||
retrieved_info=entities_relationships, | ||
) | ||
|
||
@staticmethod | ||
def retrieve_entities_from_llm(query: str): | ||
groq_client = configure_groq() | ||
SYS_PROMPT = """ | ||
The user has a knowledge graph and wants to query it. For that he needs entities. | ||
Your task is to extract all entities from the below query. | ||
Format you answer as below example: | ||
entity1, entity2, entity3 | ||
""" | ||
USER_PROMPT = f"user input: ```{query}``` \n\n output: " | ||
chat_completion = groq_client.chat.completions.create( | ||
messages=[ | ||
{"role": "system", "content": SYS_PROMPT}, | ||
{"role": "user", "content": USER_PROMPT}, | ||
], | ||
model="llama3-8b-8192", | ||
) | ||
response = chat_completion.choices[0].message.content | ||
return [entity.strip() for entity in response.split(",")] | ||
|
||
@staticmethod | ||
def retrieve_entities_from_spacy(query: str): | ||
nlp = spacy.load('en_core_web_sm') | ||
doc = nlp(query) | ||
|
||
entities = [(ent.text, ent.label_) for ent in doc.ents] | ||
return entities | ||
|
||
def query_graph_via_langchain(self, query: str, graph_path: str): | ||
graph1 = NetworkxEntityGraph.from_gml(graph_path) | ||
chain = GraphQAChain.from_llm( | ||
ChatGroq( | ||
temperature=0, | ||
model="llama3-8b-8192", | ||
api_key=os.getenv("GROQ_API_KEY") | ||
), graph=graph1, verbose=True | ||
) | ||
response = chain.invoke(query) | ||
return response |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.